Ruby 4.1.0dev (2026-04-28 revision 8d929853d88b5ca4ff72b78b5b539afc1b8b7d71)
prism.c
4
5#include "prism/internal/allocator.h"
6#include "prism/internal/arena.h"
7#include "prism/internal/bit.h"
8#include "prism/internal/buffer.h"
9#include "prism/internal/char.h"
10#include "prism/internal/comments.h"
11#include "prism/internal/constant_pool.h"
12#include "prism/internal/diagnostic.h"
13#include "prism/internal/encoding.h"
14#include "prism/internal/integer.h"
15#include "prism/internal/isinf.h"
16#include "prism/internal/line_offset_list.h"
17#include "prism/internal/list.h"
18#include "prism/internal/magic_comments.h"
19#include "prism/internal/memchr.h"
20#include "prism/internal/node.h"
21#include "prism/internal/options.h"
22#include "prism/internal/parser.h"
23#include "prism/internal/regexp.h"
24#include "prism/internal/serialize.h"
25#include "prism/internal/source.h"
26#include "prism/internal/static_literals.h"
27#include "prism/internal/stringy.h"
28#include "prism/internal/strncasecmp.h"
29#include "prism/internal/strpbrk.h"
30#include "prism/internal/tokens.h"
31
32#include "prism/excludes.h"
33#include "prism/serialize.h"
34#include "prism/stream.h"
35#include "prism/version.h"
36
37#include <assert.h>
38#include <errno.h>
39#include <limits.h>
40#include <locale.h>
41#include <math.h>
42#include <stdio.h>
43#include <stdlib.h>
44
50#ifndef PRISM_DEPTH_MAXIMUM
51 #define PRISM_DEPTH_MAXIMUM 10000
52#endif
53
58#define PM_CONCATENATE(left, right) left ## right
59
65#if defined(_Static_assert)
66# define PM_STATIC_ASSERT(line, condition, message) _Static_assert(condition, message)
67#else
68# define PM_STATIC_ASSERT(line, condition, message) typedef char PM_CONCATENATE(static_assert_, line)[(condition) ? 1 : -1]
69#endif
70
75#if defined(__GNUC__) || defined(__clang__)
77 #define PRISM_LIKELY(x) __builtin_expect(!!(x), 1)
78
80 #define PRISM_UNLIKELY(x) __builtin_expect(!!(x), 0)
81#else
83 #define PRISM_LIKELY(x) (x)
84
86 #define PRISM_UNLIKELY(x) (x)
87#endif
88
92const char *
93pm_version(void) {
94 return PRISM_VERSION;
95}
96
101#define PM_TAB_WHITESPACE_SIZE 8
102
103// Macros for min/max.
104#define MIN(a,b) (((a)<(b))?(a):(b))
105#define MAX(a,b) (((a)>(b))?(a):(b))
106
107/******************************************************************************/
108/* Helpful AST-related macros */
109/******************************************************************************/
110
111#define U32(value_) ((uint32_t) (value_))
112
113#define FL PM_NODE_FLAGS
114#define UP PM_NODE_UPCAST
115
116#define PM_LOCATION_START(location_) ((location_)->start)
117#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
118
119#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
120#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
121#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
122#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
123
124#define PM_NODE_START(node_) (UP(node_)->location.start)
125#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
126#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
127#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
128
129#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
130#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
131
132#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
133#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
134#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
135#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
136#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
137
138#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
139#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
140#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
141#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
142
143#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
144#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
145#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
146#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
147
148#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
149#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
150#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
151
152/******************************************************************************/
153/* Lex mode manipulations */
154/******************************************************************************/
155
160static PRISM_INLINE uint8_t
161lex_mode_incrementor(const uint8_t start) {
162 switch (start) {
163 case '(':
164 case '[':
165 case '{':
166 case '<':
167 return start;
168 default:
169 return '\0';
170 }
171}
172
177static PRISM_INLINE uint8_t
178lex_mode_terminator(const uint8_t start) {
179 switch (start) {
180 case '(':
181 return ')';
182 case '[':
183 return ']';
184 case '{':
185 return '}';
186 case '<':
187 return '>';
188 default:
189 return start;
190 }
191}
192
198static bool
199lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
200 lex_mode.prev = parser->lex_modes.current;
201 parser->lex_modes.index++;
202
203 if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
204 parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
205 if (parser->lex_modes.current == NULL) return false;
206
207 *parser->lex_modes.current = lex_mode;
208 } else {
209 parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
210 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
211 }
212
213 return true;
214}
215
219static PRISM_INLINE bool
220lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
221 uint8_t incrementor = lex_mode_incrementor(delimiter);
222 uint8_t terminator = lex_mode_terminator(delimiter);
223
224 pm_lex_mode_t lex_mode = {
225 .mode = PM_LEX_LIST,
226 .as.list = {
227 .nesting = 0,
228 .interpolation = interpolation,
229 .incrementor = incrementor,
230 .terminator = terminator
231 }
232 };
233
234 // These are the places where we need to split up the content of the list.
235 // We'll use strpbrk to find the first of these characters.
236 uint8_t *breakpoints = lex_mode.as.list.breakpoints;
237 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
238 memcpy(breakpoints, "\\ \t\f\r\v\n", sizeof("\\ \t\f\r\v\n") - 1);
239 size_t index = 7;
240
241 // Now we'll add the terminator to the list of breakpoints. If the
242 // terminator is not already a NULL byte, add it to the list.
243 if (terminator != '\0') {
244 breakpoints[index++] = terminator;
245 }
246
247 // If interpolation is allowed, then we're going to check for the #
248 // character. Otherwise we'll only look for escapes and the terminator.
249 if (interpolation) {
250 breakpoints[index++] = '#';
251 }
252
253 // If there is an incrementor, then we'll check for that as well.
254 if (incrementor != '\0') {
255 breakpoints[index++] = incrementor;
256 }
257
258 parser->explicit_encoding = NULL;
259 return lex_mode_push(parser, lex_mode);
260}
261
267static PRISM_INLINE bool
268lex_mode_push_list_eof(pm_parser_t *parser) {
269 return lex_mode_push_list(parser, false, '\0');
270}
271
275static PRISM_INLINE bool
276lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
277 pm_lex_mode_t lex_mode = {
278 .mode = PM_LEX_REGEXP,
279 .as.regexp = {
280 .nesting = 0,
281 .incrementor = incrementor,
282 .terminator = terminator
283 }
284 };
285
286 // These are the places where we need to split up the content of the
287 // regular expression. We'll use strpbrk to find the first of these
288 // characters.
289 uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
290 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
291 memcpy(breakpoints, "\r\n\\#", sizeof("\r\n\\#") - 1);
292 size_t index = 4;
293
294 // First we'll add the terminator.
295 if (terminator != '\0') {
296 breakpoints[index++] = terminator;
297 }
298
299 // Next, if there is an incrementor, then we'll check for that as well.
300 if (incrementor != '\0') {
301 breakpoints[index++] = incrementor;
302 }
303
304 parser->explicit_encoding = NULL;
305 return lex_mode_push(parser, lex_mode);
306}
307
311static PRISM_INLINE bool
312lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
313 pm_lex_mode_t lex_mode = {
314 .mode = PM_LEX_STRING,
315 .as.string = {
316 .nesting = 0,
317 .interpolation = interpolation,
318 .label_allowed = label_allowed,
319 .incrementor = incrementor,
320 .terminator = terminator
321 }
322 };
323
324 // These are the places where we need to split up the content of the
325 // string. We'll use strpbrk to find the first of these characters.
326 uint8_t *breakpoints = lex_mode.as.string.breakpoints;
327 memset(breakpoints, 0, PM_STRPBRK_CACHE_SIZE);
328 memcpy(breakpoints, "\r\n\\", sizeof("\r\n\\") - 1);
329 size_t index = 3;
330
331 // Now add in the terminator. If the terminator is not already a NULL byte,
332 // then we'll add it.
333 if (terminator != '\0') {
334 breakpoints[index++] = terminator;
335 }
336
337 // If interpolation is allowed, then we're going to check for the #
338 // character. Otherwise we'll only look for escapes and the terminator.
339 if (interpolation) {
340 breakpoints[index++] = '#';
341 }
342
343 // If we have an incrementor, then we'll add that in as a breakpoint as
344 // well.
345 if (incrementor != '\0') {
346 breakpoints[index++] = incrementor;
347 }
348
349 parser->explicit_encoding = NULL;
350 return lex_mode_push(parser, lex_mode);
351}
352
358static PRISM_INLINE bool
359lex_mode_push_string_eof(pm_parser_t *parser) {
360 return lex_mode_push_string(parser, false, false, '\0', '\0');
361}
362
368static void
369lex_mode_pop(pm_parser_t *parser) {
370 if (parser->lex_modes.index == 0) {
371 parser->lex_modes.current->mode = PM_LEX_DEFAULT;
372 } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
373 parser->lex_modes.index--;
374 parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
375 } else {
376 parser->lex_modes.index--;
377 pm_lex_mode_t *prev = parser->lex_modes.current->prev;
378 xfree_sized(parser->lex_modes.current, sizeof(pm_lex_mode_t));
379 parser->lex_modes.current = prev;
380 }
381}
382
386static PRISM_INLINE bool
387lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
388 return parser->lex_state & state;
389}
390
391typedef enum {
392 PM_IGNORED_NEWLINE_NONE = 0,
393 PM_IGNORED_NEWLINE_ALL,
394 PM_IGNORED_NEWLINE_PATTERN
395} pm_ignored_newline_type_t;
396
397static PRISM_INLINE pm_ignored_newline_type_t
398lex_state_ignored_p(pm_parser_t *parser) {
399 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
400
401 if (ignored) {
402 return PM_IGNORED_NEWLINE_ALL;
403 } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
404 return PM_IGNORED_NEWLINE_PATTERN;
405 } else {
406 return PM_IGNORED_NEWLINE_NONE;
407 }
408}
409
410static PRISM_INLINE bool
411lex_state_beg_p(pm_parser_t *parser) {
412 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
413}
414
415static PRISM_INLINE bool
416lex_state_arg_p(pm_parser_t *parser) {
417 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
418}
419
420static PRISM_INLINE bool
421lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
422 if (parser->current.end >= parser->end) {
423 return false;
424 }
425 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
426}
427
428static PRISM_INLINE bool
429lex_state_end_p(pm_parser_t *parser) {
430 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
431}
432
436static PRISM_INLINE bool
437lex_state_operator_p(pm_parser_t *parser) {
438 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
439}
440
445static PRISM_INLINE void
446lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
447 parser->lex_state = state;
448}
449
450#ifndef PM_DEBUG_LOGGING
455#define PM_DEBUG_LOGGING 0
456#endif
457
458#if PM_DEBUG_LOGGING
459PRISM_UNUSED static void
460debug_state(pm_parser_t *parser) {
461 fprintf(stderr, "STATE: ");
462 bool first = true;
463
464 if (parser->lex_state == PM_LEX_STATE_NONE) {
465 fprintf(stderr, "NONE\n");
466 return;
467 }
468
469#define CHECK_STATE(state) \
470 if (parser->lex_state & state) { \
471 if (!first) fprintf(stderr, "|"); \
472 fprintf(stderr, "%s", #state); \
473 first = false; \
474 }
475
476 CHECK_STATE(PM_LEX_STATE_BEG)
477 CHECK_STATE(PM_LEX_STATE_END)
478 CHECK_STATE(PM_LEX_STATE_ENDARG)
479 CHECK_STATE(PM_LEX_STATE_ENDFN)
480 CHECK_STATE(PM_LEX_STATE_ARG)
481 CHECK_STATE(PM_LEX_STATE_CMDARG)
482 CHECK_STATE(PM_LEX_STATE_MID)
483 CHECK_STATE(PM_LEX_STATE_FNAME)
484 CHECK_STATE(PM_LEX_STATE_DOT)
485 CHECK_STATE(PM_LEX_STATE_CLASS)
486 CHECK_STATE(PM_LEX_STATE_LABEL)
487 CHECK_STATE(PM_LEX_STATE_LABELED)
488 CHECK_STATE(PM_LEX_STATE_FITEM)
489
490#undef CHECK_STATE
491
492 fprintf(stderr, "\n");
493}
494
495static void
496debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
497 fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
498 debug_state(parser);
499 lex_state_set(parser, state);
500 fprintf(stderr, "Now: ");
501 debug_state(parser);
502 fprintf(stderr, "\n");
503}
504
505#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
506#endif
507
508/******************************************************************************/
509/* Command-line macro helpers */
510/******************************************************************************/
511
513#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
514
516#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
517
519#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
520
522#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
523
525#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
526
528#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
529
531#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
532
533/******************************************************************************/
534/* Diagnostic-related functions */
535/******************************************************************************/
536
540static PRISM_INLINE void
541pm_parser_err(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
542 pm_diagnostic_list_append(&parser->metadata_arena, &parser->error_list, start, length, diag_id);
543}
544
549static PRISM_INLINE void
550pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
551 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
552}
553
558static PRISM_INLINE void
559pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
560 pm_parser_err_token(parser, &parser->current, diag_id);
561}
562
567static PRISM_INLINE void
568pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
569 pm_parser_err_token(parser, &parser->previous, diag_id);
570}
571
576static PRISM_INLINE void
577pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
578 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
579}
580
584#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
585 pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
586
591#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
592 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
593
598#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
599 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
600
605#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
606 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
607
612#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
613 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
614
618static PRISM_INLINE void
619pm_parser_warn(pm_parser_t *parser, uint32_t start, uint32_t length, pm_diagnostic_id_t diag_id) {
620 pm_diagnostic_list_append(&parser->metadata_arena, &parser->warning_list, start, length, diag_id);
621}
622
627static PRISM_INLINE void
628pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
629 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
630}
631
636static PRISM_INLINE void
637pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
638 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
639}
640
645#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
646 pm_diagnostic_list_append_format(&(parser_)->metadata_arena, &(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
647
652#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
653 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
654
659#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
660 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
661
666#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
667 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
668
674static void
675pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
676 PM_PARSER_ERR_FORMAT(
677 parser,
678 U32(ident_start - parser->start),
679 U32(ident_length),
680 PM_ERR_HEREDOC_TERM,
681 (int) ident_length,
682 (const char *) ident_start
683 );
684}
685
686/******************************************************************************/
687/* Scope-related functions */
688/******************************************************************************/
689
693static bool
694pm_parser_scope_push(pm_parser_t *parser, bool closed) {
695 pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
696 if (scope == NULL) return false;
697
698 *scope = (pm_scope_t) {
699 .previous = parser->current_scope,
700 .locals = { 0 },
701 .parameters = PM_SCOPE_PARAMETERS_NONE,
702 .implicit_parameters = { 0 },
703 .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
704 .closed = closed
705 };
706
707 parser->current_scope = scope;
708 return true;
709}
710
715static bool
716pm_parser_scope_toplevel_p(pm_parser_t *parser) {
717 pm_scope_t *scope = parser->current_scope;
718
719 do {
720 if (scope->previous == NULL) return true;
721 if (scope->closed) return false;
722 } while ((scope = scope->previous) != NULL);
723
724 assert(false && "unreachable");
725 return true;
726}
727
731static pm_scope_t *
732pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
733 pm_scope_t *scope = parser->current_scope;
734
735 while (depth-- > 0) {
736 assert(scope != NULL);
737 scope = scope->previous;
738 }
739
740 return scope;
741}
742
743typedef enum {
744 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
745 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
746 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
747} pm_scope_forwarding_param_check_result_t;
748
749static pm_scope_forwarding_param_check_result_t
750pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
751 pm_scope_t *scope = parser->current_scope;
752 bool conflict = false;
753
754 while (scope != NULL) {
755 if (scope->parameters & mask) {
756 if (scope->closed) {
757 if (conflict) {
758 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
759 } else {
760 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
761 }
762 }
763
764 conflict = true;
765 }
766
767 if (scope->closed) break;
768 scope = scope->previous;
769 }
770
771 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
772}
773
774static void
775pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
776 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
777 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
778 // Pass.
779 break;
780 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
781 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
782 break;
783 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
784 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
785 break;
786 }
787}
788
789static void
790pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
791 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
792 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
793 // Pass.
794 break;
795 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
796 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
797 break;
798 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
799 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
800 break;
801 }
802}
803
804static void
805pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
806 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
807 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
808 // Pass.
809 break;
810 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
811 // This shouldn't happen, because ... is not allowed in the
812 // declaration of blocks. If we get here, we assume we already have
813 // an error for this.
814 break;
815 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
816 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
817 break;
818 }
819}
820
821static void
822pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
823 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
824 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
825 // Pass.
826 break;
827 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
828 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
829 break;
830 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
831 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
832 break;
833 }
834}
835
839static PRISM_INLINE pm_shareable_constant_value_t
840pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
841 return parser->current_scope->shareable_constant;
842}
843
848static void
849pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
850 pm_scope_t *scope = parser->current_scope;
851
852 do {
853 scope->shareable_constant = shareable_constant;
854 } while (!scope->closed && (scope = scope->previous) != NULL);
855}
856
857/******************************************************************************/
858/* Local variable-related functions */
859/******************************************************************************/
860
864#define PM_LOCALS_HASH_THRESHOLD 5
865
866static void
867pm_locals_free(pm_locals_t *locals) {
868 if (locals->capacity > 0) {
869 xfree_sized(locals->locals, locals->capacity * sizeof(pm_local_t));
870 }
871}
872
877static uint32_t
878pm_locals_hash(pm_constant_id_t name) {
879 name = ((name >> 16) ^ name) * 0x45d9f3b;
880 name = ((name >> 16) ^ name) * 0x45d9f3b;
881 name = (name >> 16) ^ name;
882 return name;
883}
884
889static void
890pm_locals_resize(pm_locals_t *locals) {
891 uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
892 assert(next_capacity > locals->capacity);
893
894 pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
895 if (next_locals == NULL) abort();
896
897 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
898 if (locals->size > 0) {
899 memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
900 }
901 } else {
902 // If we just switched from a list to a hash, then we need to fill in
903 // the hash values of all of the locals.
904 bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
905 uint32_t mask = next_capacity - 1;
906
907 for (uint32_t index = 0; index < locals->capacity; index++) {
908 pm_local_t *local = &locals->locals[index];
909
910 if (local->name != PM_CONSTANT_ID_UNSET) {
911 if (hash_needed) local->hash = pm_locals_hash(local->name);
912
913 uint32_t hash = local->hash;
914 while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
915 next_locals[hash & mask] = *local;
916 }
917 }
918 }
919
920 pm_locals_free(locals);
921 locals->locals = next_locals;
922 locals->capacity = next_capacity;
923}
924
940static bool
941pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, uint32_t start, uint32_t length, uint32_t reads) {
942 if (locals->size >= (locals->capacity / 4 * 3)) {
943 pm_locals_resize(locals);
944 }
945
946 locals->bloom |= (1u << (name & 31));
947
948 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
949 for (uint32_t index = 0; index < locals->capacity; index++) {
950 pm_local_t *local = &locals->locals[index];
951
952 if (local->name == PM_CONSTANT_ID_UNSET) {
953 *local = (pm_local_t) {
954 .name = name,
955 .location = { .start = start, .length = length },
956 .index = locals->size++,
957 .reads = reads,
958 .hash = 0
959 };
960 return true;
961 } else if (local->name == name) {
962 return false;
963 }
964 }
965 } else {
966 uint32_t mask = locals->capacity - 1;
967 uint32_t hash = pm_locals_hash(name);
968 uint32_t initial_hash = hash;
969
970 do {
971 pm_local_t *local = &locals->locals[hash & mask];
972
973 if (local->name == PM_CONSTANT_ID_UNSET) {
974 *local = (pm_local_t) {
975 .name = name,
976 .location = { .start = start, .length = length },
977 .index = locals->size++,
978 .reads = reads,
979 .hash = initial_hash
980 };
981 return true;
982 } else if (local->name == name) {
983 return false;
984 } else {
985 hash++;
986 }
987 } while ((hash & mask) != initial_hash);
988 }
989
990 assert(false && "unreachable");
991 return true;
992}
993
998static uint32_t
999pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
1000 if (!(locals->bloom & (1u << (name & 31)))) return UINT32_MAX;
1001
1002 if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
1003 for (uint32_t index = 0; index < locals->size; index++) {
1004 pm_local_t *local = &locals->locals[index];
1005 if (local->name == name) return index;
1006 }
1007 } else {
1008 uint32_t mask = locals->capacity - 1;
1009 uint32_t hash = pm_locals_hash(name);
1010 uint32_t initial_hash = hash & mask;
1011
1012 do {
1013 pm_local_t *local = &locals->locals[hash & mask];
1014
1015 if (local->name == PM_CONSTANT_ID_UNSET) {
1016 return UINT32_MAX;
1017 } else if (local->name == name) {
1018 return hash & mask;
1019 } else {
1020 hash++;
1021 }
1022 } while ((hash & mask) != initial_hash);
1023 }
1024
1025 return UINT32_MAX;
1026}
1027
1032static void
1033pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
1034 uint32_t index = pm_locals_find(locals, name);
1035 assert(index != UINT32_MAX);
1036
1037 pm_local_t *local = &locals->locals[index];
1038 assert(local->reads < UINT32_MAX);
1039
1040 local->reads++;
1041}
1042
1047static void
1048pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
1049 uint32_t index = pm_locals_find(locals, name);
1050 assert(index != UINT32_MAX);
1051
1052 pm_local_t *local = &locals->locals[index];
1053 assert(local->reads > 0);
1054
1055 local->reads--;
1056}
1057
1061static uint32_t
1062pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
1063 uint32_t index = pm_locals_find(locals, name);
1064 assert(index != UINT32_MAX);
1065
1066 return locals->locals[index].reads;
1067}
1068
1077static void
1078pm_locals_order(pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
1079 pm_constant_id_list_init_capacity(parser->arena, list, locals->size);
1080
1081 // If we're still below the threshold for switching to a hash, then we only
1082 // need to loop over the locals until we hit the size because the locals are
1083 // stored in a list.
1084 uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
1085
1086 // We will only warn for unused variables if we're not at the top level, or
1087 // if we're parsing a file outside of eval or -e.
1088 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
1089
1090 for (uint32_t index = 0; index < capacity; index++) {
1091 pm_local_t *local = &locals->locals[index];
1092
1093 if (local->name != PM_CONSTANT_ID_UNSET) {
1094 pm_constant_id_list_insert(list, (size_t) local->index, local->name);
1095
1096 if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->location.start, parser->start_line) >= 0))) {
1097 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
1098
1099 if (constant->length >= 1 && *constant->start != '_') {
1100 PM_PARSER_WARN_FORMAT(
1101 parser,
1102 local->location.start,
1103 local->location.length,
1104 PM_WARN_UNUSED_LOCAL_VARIABLE,
1105 (int) constant->length,
1106 (const char *) constant->start
1107 );
1108 }
1109 }
1110 }
1111 }
1112}
1113
1114/******************************************************************************/
1115/* Node-related functions */
1116/******************************************************************************/
1117
1122pm_parser_constant_id_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1123 /* Fast path: if this is the same token as the last lookup (same pointer
1124 * range), return the cached result. */
1125 if (start == parser->constant_cache.start && end == parser->constant_cache.end) {
1126 return parser->constant_cache.id;
1127 }
1128
1129 pm_constant_id_t id = pm_constant_pool_insert_shared(&parser->metadata_arena, &parser->constant_pool, start, (size_t) (end - start));
1130
1131 parser->constant_cache.start = start;
1132 parser->constant_cache.end = end;
1133 parser->constant_cache.id = id;
1134
1135 return id;
1136}
1137
1142pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
1143 return pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, start, length);
1144}
1145
1150pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1151 return pm_constant_pool_insert_constant(&parser->metadata_arena, &parser->constant_pool, (const uint8_t *) start, length);
1152}
1153
1158pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1159 return pm_parser_constant_id_raw(parser, token->start, token->end);
1160}
1161
1166#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1167 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1168
1174static pm_node_t *
1175pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1176 pm_node_t *void_node = NULL;
1177
1178 while (node != NULL) {
1179 switch (PM_NODE_TYPE(node)) {
1180 case PM_CASE_VOID_VALUE:
1181 return void_node != NULL ? void_node : node;
1182 case PM_MATCH_PREDICATE_NODE:
1183 return NULL;
1184 case PM_BEGIN_NODE: {
1185 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1186
1187 if (cast->ensure_clause != NULL) {
1188 if (cast->rescue_clause != NULL) {
1189 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->rescue_clause));
1190 if (vn != NULL) return vn;
1191 }
1192
1193 if (cast->statements != NULL) {
1194 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1195 if (vn != NULL) return vn;
1196 }
1197
1198 node = UP(cast->ensure_clause);
1199 } else if (cast->rescue_clause != NULL) {
1200 // https://bugs.ruby-lang.org/issues/21669
1201 if (cast->else_clause == NULL || parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1202 if (cast->statements == NULL) return NULL;
1203
1204 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1205 if (vn == NULL) return NULL;
1206 if (void_node == NULL) void_node = vn;
1207 }
1208
1209 for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1210 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1211
1212 if (vn == NULL) {
1213 // https://bugs.ruby-lang.org/issues/21669
1214 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1215 return NULL;
1216 }
1217 void_node = NULL;
1218 break;
1219 }
1220 }
1221
1222 if (cast->else_clause != NULL) {
1223 node = UP(cast->else_clause);
1224
1225 // https://bugs.ruby-lang.org/issues/21669
1226 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1227 pm_node_t *vn = pm_check_value_expression(parser, node);
1228 if (vn != NULL) return vn;
1229 }
1230 } else {
1231 return void_node;
1232 }
1233 } else {
1234 node = UP(cast->statements);
1235 }
1236
1237 break;
1238 }
1239 case PM_CASE_NODE: {
1240 // https://bugs.ruby-lang.org/issues/21669
1241 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1242 return NULL;
1243 }
1244
1245 pm_case_node_t *cast = (pm_case_node_t *) node;
1246 if (cast->else_clause == NULL) return NULL;
1247
1248 pm_node_t *condition;
1249 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1250 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1251
1252 pm_when_node_t *cast = (pm_when_node_t *) condition;
1253 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1254 if (vn == NULL) return NULL;
1255 if (void_node == NULL) void_node = vn;
1256 }
1257
1258 node = UP(cast->else_clause);
1259 break;
1260 }
1261 case PM_CASE_MATCH_NODE: {
1262 // https://bugs.ruby-lang.org/issues/21669
1263 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
1264 return NULL;
1265 }
1266
1268 if (cast->else_clause == NULL) return NULL;
1269
1270 pm_node_t *condition;
1271 PM_NODE_LIST_FOREACH(&cast->conditions, index, condition) {
1272 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1273
1274 pm_in_node_t *cast = (pm_in_node_t *) condition;
1275 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1276 if (vn == NULL) return NULL;
1277 if (void_node == NULL) void_node = vn;
1278 }
1279
1280 node = UP(cast->else_clause);
1281 break;
1282 }
1283 case PM_ENSURE_NODE: {
1284 pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1285 node = UP(cast->statements);
1286 break;
1287 }
1288 case PM_PARENTHESES_NODE: {
1290 node = UP(cast->body);
1291 break;
1292 }
1293 case PM_STATEMENTS_NODE: {
1295
1296 // https://bugs.ruby-lang.org/issues/21669
1297 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1) {
1298 pm_node_t *body_part;
1299 PM_NODE_LIST_FOREACH(&cast->body, index, body_part) {
1300 switch (PM_NODE_TYPE(body_part)) {
1301 case PM_CASE_VOID_VALUE:
1302 if (void_node == NULL) {
1303 void_node = body_part;
1304 }
1305 return void_node;
1306 default: break;
1307 }
1308 }
1309 }
1310
1311 node = cast->body.nodes[cast->body.size - 1];
1312 break;
1313 }
1314 case PM_IF_NODE: {
1315 pm_if_node_t *cast = (pm_if_node_t *) node;
1316 if (cast->statements == NULL || cast->subsequent == NULL) {
1317 return NULL;
1318 }
1319 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1320 if (vn == NULL) {
1321 return NULL;
1322 }
1323 if (void_node == NULL) {
1324 void_node = vn;
1325 }
1326 node = cast->subsequent;
1327 break;
1328 }
1329 case PM_UNLESS_NODE: {
1330 pm_unless_node_t *cast = (pm_unless_node_t *) node;
1331 if (cast->statements == NULL || cast->else_clause == NULL) {
1332 return NULL;
1333 }
1334 pm_node_t *vn = pm_check_value_expression(parser, UP(cast->statements));
1335 if (vn == NULL) {
1336 return NULL;
1337 }
1338 if (void_node == NULL) {
1339 void_node = vn;
1340 }
1341 node = UP(cast->else_clause);
1342 break;
1343 }
1344 case PM_ELSE_NODE: {
1345 pm_else_node_t *cast = (pm_else_node_t *) node;
1346 node = UP(cast->statements);
1347 break;
1348 }
1349 case PM_AND_NODE: {
1350 pm_and_node_t *cast = (pm_and_node_t *) node;
1351 node = cast->left;
1352 break;
1353 }
1354 case PM_OR_NODE: {
1355 pm_or_node_t *cast = (pm_or_node_t *) node;
1356 node = cast->left;
1357 break;
1358 }
1359 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1361
1362 pm_scope_t *scope = parser->current_scope;
1363 for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1364
1365 pm_locals_read(&scope->locals, cast->name);
1366 return NULL;
1367 }
1368 default:
1369 return NULL;
1370 }
1371 }
1372
1373 return NULL;
1374}
1375
1376static PRISM_INLINE void
1377pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1378 pm_node_t *void_node = pm_check_value_expression(parser, node);
1379 if (void_node != NULL) {
1380 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1381 }
1382}
1383
1387static void
1388pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1389 const char *type = NULL;
1390 int length = 0;
1391
1392 switch (PM_NODE_TYPE(node)) {
1393 case PM_BACK_REFERENCE_READ_NODE:
1394 case PM_CLASS_VARIABLE_READ_NODE:
1395 case PM_GLOBAL_VARIABLE_READ_NODE:
1396 case PM_INSTANCE_VARIABLE_READ_NODE:
1397 case PM_LOCAL_VARIABLE_READ_NODE:
1398 case PM_NUMBERED_REFERENCE_READ_NODE:
1399 type = "a variable";
1400 length = 10;
1401 break;
1402 case PM_CALL_NODE: {
1403 const pm_call_node_t *cast = (const pm_call_node_t *) node;
1404 if (cast->call_operator_loc.length > 0 || cast->message_loc.length == 0) break;
1405
1406 const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1407 switch (message->length) {
1408 case 1:
1409 switch (message->start[0]) {
1410 case '+':
1411 case '-':
1412 case '*':
1413 case '/':
1414 case '%':
1415 case '|':
1416 case '^':
1417 case '&':
1418 case '>':
1419 case '<':
1420 type = (const char *) message->start;
1421 length = 1;
1422 break;
1423 }
1424 break;
1425 case 2:
1426 switch (message->start[1]) {
1427 case '=':
1428 if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1429 type = (const char *) message->start;
1430 length = 2;
1431 }
1432 break;
1433 case '@':
1434 if (message->start[0] == '+' || message->start[0] == '-') {
1435 type = (const char *) message->start;
1436 length = 2;
1437 }
1438 break;
1439 case '*':
1440 if (message->start[0] == '*') {
1441 type = (const char *) message->start;
1442 length = 2;
1443 }
1444 break;
1445 }
1446 break;
1447 case 3:
1448 if (memcmp(message->start, "<=>", 3) == 0) {
1449 type = "<=>";
1450 length = 3;
1451 }
1452 break;
1453 }
1454
1455 break;
1456 }
1457 case PM_CONSTANT_PATH_NODE:
1458 type = "::";
1459 length = 2;
1460 break;
1461 case PM_CONSTANT_READ_NODE:
1462 type = "a constant";
1463 length = 10;
1464 break;
1465 case PM_DEFINED_NODE:
1466 type = "defined?";
1467 length = 8;
1468 break;
1469 case PM_FALSE_NODE:
1470 type = "false";
1471 length = 5;
1472 break;
1473 case PM_FLOAT_NODE:
1474 case PM_IMAGINARY_NODE:
1475 case PM_INTEGER_NODE:
1476 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1477 case PM_INTERPOLATED_STRING_NODE:
1478 case PM_RATIONAL_NODE:
1479 case PM_REGULAR_EXPRESSION_NODE:
1480 case PM_SOURCE_ENCODING_NODE:
1481 case PM_SOURCE_FILE_NODE:
1482 case PM_SOURCE_LINE_NODE:
1483 case PM_STRING_NODE:
1484 case PM_SYMBOL_NODE:
1485 type = "a literal";
1486 length = 9;
1487 break;
1488 case PM_NIL_NODE:
1489 type = "nil";
1490 length = 3;
1491 break;
1492 case PM_RANGE_NODE: {
1493 const pm_range_node_t *cast = (const pm_range_node_t *) node;
1494
1495 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1496 type = "...";
1497 length = 3;
1498 } else {
1499 type = "..";
1500 length = 2;
1501 }
1502
1503 break;
1504 }
1505 case PM_SELF_NODE:
1506 type = "self";
1507 length = 4;
1508 break;
1509 case PM_TRUE_NODE:
1510 type = "true";
1511 length = 4;
1512 break;
1513 default:
1514 break;
1515 }
1516
1517 if (type != NULL) {
1518 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1519 }
1520}
1521
1526static void
1527pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1528 assert(node->body.size > 0);
1529 const size_t size = node->body.size - (last_value ? 1 : 0);
1530 for (size_t index = 0; index < size; index++) {
1531 pm_void_statement_check(parser, node->body.nodes[index]);
1532 }
1533}
1534
1540typedef enum {
1541 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1542 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1543 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1544} pm_conditional_predicate_type_t;
1545
1549static void
1550pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1551 switch (type) {
1552 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1553 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1554 break;
1555 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1556 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1557 break;
1558 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1559 break;
1560 }
1561}
1562
1567static bool
1568pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1569 switch (PM_NODE_TYPE(node)) {
1570 case PM_ARRAY_NODE: {
1571 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1572
1573 const pm_array_node_t *cast = (const pm_array_node_t *) node;
1574 for (size_t index = 0; index < cast->elements.size; index++) {
1575 if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1576 }
1577
1578 return true;
1579 }
1580 case PM_HASH_NODE: {
1581 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1582
1583 const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1584 for (size_t index = 0; index < cast->elements.size; index++) {
1585 const pm_node_t *element = cast->elements.nodes[index];
1586 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1587
1588 const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1589 if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1590 }
1591
1592 return true;
1593 }
1594 case PM_FALSE_NODE:
1595 case PM_FLOAT_NODE:
1596 case PM_IMAGINARY_NODE:
1597 case PM_INTEGER_NODE:
1598 case PM_NIL_NODE:
1599 case PM_RATIONAL_NODE:
1600 case PM_REGULAR_EXPRESSION_NODE:
1601 case PM_SOURCE_ENCODING_NODE:
1602 case PM_SOURCE_FILE_NODE:
1603 case PM_SOURCE_LINE_NODE:
1604 case PM_STRING_NODE:
1605 case PM_SYMBOL_NODE:
1606 case PM_TRUE_NODE:
1607 return true;
1608 default:
1609 return false;
1610 }
1611}
1612
1617static PRISM_INLINE void
1618pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1619 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1620 pm_parser_warn_node(parser, node, parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1621 }
1622}
1623
1636static void
1637pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1638 switch (PM_NODE_TYPE(node)) {
1639 case PM_AND_NODE: {
1640 pm_and_node_t *cast = (pm_and_node_t *) node;
1641 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1642 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1643 break;
1644 }
1645 case PM_OR_NODE: {
1646 pm_or_node_t *cast = (pm_or_node_t *) node;
1647 pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1648 pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1649 break;
1650 }
1651 case PM_PARENTHESES_NODE: {
1653
1654 if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1655 pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1656 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1657 }
1658
1659 break;
1660 }
1661 case PM_BEGIN_NODE: {
1662 pm_begin_node_t *cast = (pm_begin_node_t *) node;
1663 if (cast->statements != NULL) {
1664 pm_statements_node_t *statements = cast->statements;
1665 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1666 }
1667 break;
1668 }
1669 case PM_RANGE_NODE: {
1670 pm_range_node_t *cast = (pm_range_node_t *) node;
1671
1672 if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1673 if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1674
1675 // Here we change the range node into a flip flop node. We can do
1676 // this since the nodes are exactly the same except for the type.
1677 // We're only asserting against the size when we should probably
1678 // assert against the entire layout, but we'll assume tests will
1679 // catch this.
1680 assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1681 node->type = PM_FLIP_FLOP_NODE;
1682
1683 break;
1684 }
1685 case PM_REGULAR_EXPRESSION_NODE:
1686 // Here we change the regular expression node into a match last line
1687 // node. We can do this since the nodes are exactly the same except
1688 // for the type.
1690 node->type = PM_MATCH_LAST_LINE_NODE;
1691
1692 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1693 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1694 }
1695
1696 break;
1697 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1698 // Here we change the interpolated regular expression node into an
1699 // interpolated match last line node. We can do this since the nodes
1700 // are exactly the same except for the type.
1702 node->type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1703
1704 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1705 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1706 }
1707
1708 break;
1709 case PM_INTEGER_NODE:
1710 if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1711 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1712 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1713 }
1714 } else {
1715 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1716 }
1717 break;
1718 case PM_STRING_NODE:
1719 case PM_SOURCE_FILE_NODE:
1720 case PM_INTERPOLATED_STRING_NODE:
1721 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1722 break;
1723 case PM_SYMBOL_NODE:
1724 case PM_INTERPOLATED_SYMBOL_NODE:
1725 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1726 break;
1727 case PM_SOURCE_LINE_NODE:
1728 case PM_SOURCE_ENCODING_NODE:
1729 case PM_FLOAT_NODE:
1730 case PM_RATIONAL_NODE:
1731 case PM_IMAGINARY_NODE:
1732 pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1733 break;
1734 case PM_CLASS_VARIABLE_WRITE_NODE:
1735 pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1736 break;
1737 case PM_CONSTANT_WRITE_NODE:
1738 pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1739 break;
1740 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1741 pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1742 break;
1743 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1744 pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1745 break;
1746 case PM_LOCAL_VARIABLE_WRITE_NODE:
1747 pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1748 break;
1749 case PM_MULTI_WRITE_NODE:
1750 pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1751 break;
1752 default:
1753 break;
1754 }
1755}
1756
1779
1783static PRISM_INLINE const pm_location_t *
1784pm_arguments_end(pm_arguments_t *arguments) {
1785 if (arguments->block != NULL) {
1786 uint32_t end = PM_NODE_END(arguments->block);
1787
1788 if (arguments->closing_loc.length > 0) {
1789 uint32_t arguments_end = PM_LOCATION_END(&arguments->closing_loc);
1790 if (arguments_end > end) {
1791 return &arguments->closing_loc;
1792 }
1793 }
1794 return &arguments->block->location;
1795 }
1796 if (arguments->closing_loc.length > 0) {
1797 return &arguments->closing_loc;
1798 }
1799 if (arguments->arguments != NULL) {
1800 return &arguments->arguments->base.location;
1801 }
1802 if (arguments->opening_loc.length > 0) {
1803 return &arguments->opening_loc;
1804 }
1805 return NULL;
1806}
1807
1812static void
1813pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1814 // First, check that we have arguments and that we don't have a closing
1815 // location for them.
1816 if (arguments->arguments == NULL || arguments->closing_loc.length > 0) {
1817 return;
1818 }
1819
1820 // Next, check that we don't have a single parentheses argument. This would
1821 // look like:
1822 //
1823 // foo (1) {}
1824 //
1825 // In this case, it's actually okay for the block to be attached to the
1826 // call, even though it looks like it's attached to the argument.
1827 if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1828 return;
1829 }
1830
1831 // If we didn't hit a case before this check, then at this point we need to
1832 // add a syntax error.
1833 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1834}
1835
1836/******************************************************************************/
1837/* Basic character checks */
1838/******************************************************************************/
1839
1846static PRISM_INLINE size_t
1847char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
1848 if (n <= 0) return 0;
1849
1850 if (parser->encoding_changed) {
1851 size_t width;
1852
1853 if ((width = parser->encoding->alpha_char(b, n)) != 0) {
1854 return width;
1855 } else if (*b == '_') {
1856 return 1;
1857 } else if (*b >= 0x80) {
1858 return parser->encoding->char_width(b, n);
1859 } else {
1860 return 0;
1861 }
1862 } else if (*b < 0x80) {
1863 return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1864 } else {
1865 return pm_encoding_utf_8_char_width(b, n);
1866 }
1867}
1868
1873static PRISM_INLINE size_t
1874char_is_identifier_utf8(const uint8_t *b, ptrdiff_t n) {
1875 if (n <= 0) {
1876 return 0;
1877 } else if (*b < 0x80) {
1878 return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1879 } else {
1880 return pm_encoding_utf_8_char_width(b, n);
1881 }
1882}
1883
1897#if defined(PRISM_HAS_NEON)
1898#include <arm_neon.h>
1899
1900static PRISM_INLINE size_t
1901scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
1902 const uint8_t *cursor = start;
1903
1904 // Nibble-based lookup tables for classifying [a-zA-Z0-9_].
1905 // Each high nibble is assigned a unique bit; the low nibble table
1906 // contains the OR of bits for all high nibbles that have an
1907 // identifier character at that low nibble position. A byte is an
1908 // identifier character iff (low_lut[lo] & high_lut[hi]) != 0.
1909 static const uint8_t low_lut_data[16] = {
1910 0x15, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F,
1911 0x1F, 0x1F, 0x1E, 0x0A, 0x0A, 0x0A, 0x0A, 0x0E
1912 };
1913 static const uint8_t high_lut_data[16] = {
1914 0x00, 0x00, 0x00, 0x01, 0x02, 0x04, 0x08, 0x10,
1915 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
1916 };
1917 const uint8x16_t low_lut = vld1q_u8(low_lut_data);
1918 const uint8x16_t high_lut = vld1q_u8(high_lut_data);
1919 const uint8x16_t mask_0f = vdupq_n_u8(0x0F);
1920
1921 while (cursor + 16 <= end) {
1922 uint8x16_t v = vld1q_u8(cursor);
1923
1924 uint8x16_t lo_class = vqtbl1q_u8(low_lut, vandq_u8(v, mask_0f));
1925 uint8x16_t hi_class = vqtbl1q_u8(high_lut, vshrq_n_u8(v, 4));
1926 uint8x16_t ident = vandq_u8(lo_class, hi_class);
1927
1928 // Fast check: if the per-byte minimum is nonzero, every byte matched.
1929 if (vminvq_u8(ident) != 0) {
1930 cursor += 16;
1931 continue;
1932 }
1933
1934 // Find the first non-identifier byte (zero in ident).
1935 uint8x16_t is_zero = vceqq_u8(ident, vdupq_n_u8(0));
1936 uint64_t lo = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 0);
1937
1938 if (lo != 0) {
1939 cursor += pm_ctzll(lo) / 8;
1940 } else {
1941 uint64_t hi = vgetq_lane_u64(vreinterpretq_u64_u8(is_zero), 1);
1942 cursor += 8 + pm_ctzll(hi) / 8;
1943 }
1944
1945 return (size_t) (cursor - start);
1946 }
1947
1948 return (size_t) (cursor - start);
1949}
1950
1951#elif defined(PRISM_HAS_SSSE3)
1952#include <tmmintrin.h>
1953
1954static PRISM_INLINE size_t
1955scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
1956 const uint8_t *cursor = start;
1957
1958 while (cursor + 16 <= end) {
1959 __m128i v = _mm_loadu_si128((const __m128i *) cursor);
1960 __m128i zero = _mm_setzero_si128();
1961
1962 // Unsigned range check via saturating subtraction:
1963 // byte >= lo ⟺ saturate(lo - byte) == 0
1964 // byte <= hi ⟺ saturate(byte - hi) == 0
1965
1966 // Fold case: OR with 0x20 maps A-Z to a-z.
1967 __m128i lowered = _mm_or_si128(v, _mm_set1_epi8(0x20));
1968 __m128i letter = _mm_and_si128(
1969 _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x61), lowered), zero),
1970 _mm_cmpeq_epi8(_mm_subs_epu8(lowered, _mm_set1_epi8(0x7A)), zero));
1971
1972 __m128i digit = _mm_and_si128(
1973 _mm_cmpeq_epi8(_mm_subs_epu8(_mm_set1_epi8(0x30), v), zero),
1974 _mm_cmpeq_epi8(_mm_subs_epu8(v, _mm_set1_epi8(0x39)), zero));
1975
1976 __m128i underscore = _mm_cmpeq_epi8(v, _mm_set1_epi8(0x5F));
1977
1978 __m128i ident = _mm_or_si128(_mm_or_si128(letter, digit), underscore);
1979 int mask = _mm_movemask_epi8(ident);
1980
1981 if (mask == 0xFFFF) {
1982 cursor += 16;
1983 continue;
1984 }
1985
1986 cursor += pm_ctzll((uint64_t) (~mask & 0xFFFF));
1987 return (size_t) (cursor - start);
1988 }
1989
1990 return (size_t) (cursor - start);
1991}
1992
1993// The SWAR path uses pm_ctzll to find the first non-matching byte within a
1994// word, which only yields the correct byte index on little-endian targets.
1995// We gate on a positive little-endian check so that unknown-endianness
1996// platforms safely fall through to the no-op fallback.
1997#elif defined(PRISM_HAS_SWAR)
1998
2008static PRISM_INLINE size_t
2009scan_identifier_ascii(const uint8_t *start, const uint8_t *end) {
2010 static const uint64_t ones = 0x0101010101010101ULL;
2011 static const uint64_t highs = 0x8080808080808080ULL;
2012 const uint8_t *cursor = start;
2013
2014 while (cursor + 8 <= end) {
2015 uint64_t word;
2016 memcpy(&word, cursor, 8);
2017
2018 // Bail on any non-ASCII byte.
2019 if (word & highs) break;
2020
2021 uint64_t digit = ((word | highs) - ones * 0x30) & ((ones * 0x39 | highs) - word) & highs;
2022
2023 // Fold upper- and lowercase together by forcing bit 5 (OR 0x20),
2024 // then check the lowercase range once. A-Z maps to a-z; the
2025 // only non-letter byte that could alias into [0x61,0x7A] is one
2026 // whose original value was in [0x41,0x5A] — which is exactly
2027 // the uppercase letters we want to match.
2028 uint64_t lowered = word | (ones * 0x20);
2029 uint64_t letter = ((lowered | highs) - ones * 0x61) & ((ones * 0x7A | highs) - lowered) & highs;
2030
2031 // Standard SWAR "has zero byte" idiom on (word XOR 0x5F) to find
2032 // bytes equal to underscore. Safe from cross-byte borrows because
2033 // the ASCII guard above ensures all bytes are < 0x80.
2034 uint64_t xor_us = word ^ (ones * 0x5F);
2035 uint64_t underscore = (xor_us - ones) & ~xor_us & highs;
2036
2037 uint64_t ident = digit | letter | underscore;
2038
2039 if (ident == highs) {
2040 cursor += 8;
2041 continue;
2042 }
2043
2044 // Find the first non-identifier byte. On little-endian the first
2045 // byte sits in the least-significant position.
2046 uint64_t not_ident = ~ident & highs;
2047 cursor += pm_ctzll(not_ident) / 8;
2048 return (size_t) (cursor - start);
2049 }
2050
2051 return (size_t) (cursor - start);
2052}
2053
2054#else
2055
2056// No-op fallback for big-endian or other unsupported platforms.
2057// The caller's byte-at-a-time loop handles everything.
2058#define scan_identifier_ascii(start, end) ((size_t) 0)
2059
2060#endif
2061
2067static PRISM_INLINE size_t
2068char_is_identifier(const pm_parser_t *parser, const uint8_t *b, ptrdiff_t n) {
2069 if (n <= 0) {
2070 return 0;
2071 } else if (parser->encoding_changed) {
2072 size_t width;
2073
2074 if ((width = parser->encoding->alnum_char(b, n)) != 0) {
2075 return width;
2076 } else if (*b == '_') {
2077 return 1;
2078 } else if (*b >= 0x80) {
2079 return parser->encoding->char_width(b, n);
2080 } else {
2081 return 0;
2082 }
2083 } else {
2084 return char_is_identifier_utf8(b, n);
2085 }
2086}
2087
2088// Here we're defining a perfect hash for the characters that are allowed in
2089// global names. This is used to quickly check the next character after a $ to
2090// see if it's a valid character for a global name.
2091#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
2092#define PUNCT(idx) ( \
2093 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
2094 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
2095 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
2096 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
2097 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
2098 BIT('0', idx))
2099
2100const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
2101
2102#undef BIT
2103#undef PUNCT
2104
2105static PRISM_INLINE bool
2106char_is_global_name_punctuation(const uint8_t b) {
2107 const unsigned int i = (const unsigned int) b;
2108 if (i <= 0x20 || 0x7e < i) return false;
2109
2110 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
2111}
2112
2113static PRISM_INLINE bool
2114token_is_setter_name(pm_token_t *token) {
2115 return (
2116 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
2117 ((token->type == PM_TOKEN_IDENTIFIER) &&
2118 (token->end - token->start >= 2) &&
2119 (token->end[-1] == '='))
2120 );
2121}
2122
2126static bool
2127pm_local_is_keyword(const char *source, size_t length) {
2128#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
2129
2130 switch (length) {
2131 case 2:
2132 switch (source[0]) {
2133 case 'd': KEYWORD("do"); return false;
2134 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
2135 case 'o': KEYWORD("or"); return false;
2136 default: return false;
2137 }
2138 case 3:
2139 switch (source[0]) {
2140 case 'a': KEYWORD("and"); return false;
2141 case 'd': KEYWORD("def"); return false;
2142 case 'e': KEYWORD("end"); return false;
2143 case 'f': KEYWORD("for"); return false;
2144 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
2145 default: return false;
2146 }
2147 case 4:
2148 switch (source[0]) {
2149 case 'c': KEYWORD("case"); return false;
2150 case 'e': KEYWORD("else"); return false;
2151 case 'n': KEYWORD("next"); return false;
2152 case 'r': KEYWORD("redo"); return false;
2153 case 's': KEYWORD("self"); return false;
2154 case 't': KEYWORD("then"); KEYWORD("true"); return false;
2155 case 'w': KEYWORD("when"); return false;
2156 default: return false;
2157 }
2158 case 5:
2159 switch (source[0]) {
2160 case 'a': KEYWORD("alias"); return false;
2161 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
2162 case 'c': KEYWORD("class"); return false;
2163 case 'e': KEYWORD("elsif"); return false;
2164 case 'f': KEYWORD("false"); return false;
2165 case 'r': KEYWORD("retry"); return false;
2166 case 's': KEYWORD("super"); return false;
2167 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
2168 case 'w': KEYWORD("while"); return false;
2169 case 'y': KEYWORD("yield"); return false;
2170 default: return false;
2171 }
2172 case 6:
2173 switch (source[0]) {
2174 case 'e': KEYWORD("ensure"); return false;
2175 case 'm': KEYWORD("module"); return false;
2176 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
2177 case 'u': KEYWORD("unless"); return false;
2178 default: return false;
2179 }
2180 case 8:
2181 KEYWORD("__LINE__");
2182 KEYWORD("__FILE__");
2183 return false;
2184 case 12:
2185 KEYWORD("__ENCODING__");
2186 return false;
2187 default:
2188 return false;
2189 }
2190
2191#undef KEYWORD
2192}
2193
2194/******************************************************************************/
2195/* Node flag handling functions */
2196/******************************************************************************/
2197
2201static PRISM_INLINE void
2202pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
2203 node->flags |= flag;
2204}
2205
2209static PRISM_INLINE void
2210pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
2211 node->flags &= (pm_node_flags_t) ~flag;
2212}
2213
2217static PRISM_INLINE void
2218pm_node_flag_set_repeated_parameter(pm_node_t *node) {
2219 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
2220 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
2221 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
2222 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
2223 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
2224 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
2225 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
2226 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
2227
2228 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
2229}
2230
2231/******************************************************************************/
2232/* Node creation functions */
2233/******************************************************************************/
2234
2240#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
2241
2245static PRISM_INLINE pm_node_flags_t
2246pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
2247 pm_node_flags_t flags = 0;
2248
2249 if (closing->type == PM_TOKEN_REGEXP_END) {
2250 pm_buffer_t unknown_flags = { 0 };
2251
2252 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
2253 switch (*flag) {
2254 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
2255 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
2256 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
2257 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
2258
2259 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
2260 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
2261 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
2262 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
2263
2264 default: pm_buffer_append_byte(&unknown_flags, *flag);
2265 }
2266 }
2267
2268 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
2269 if (unknown_flags_length != 0) {
2270 const char *word = unknown_flags_length >= 2 ? "options" : "option";
2271 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
2272 }
2273 pm_buffer_cleanup(&unknown_flags);
2274 }
2275
2276 return flags;
2277}
2278
2279#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
2280
2281static pm_statements_node_t *
2282pm_statements_node_create(pm_parser_t *parser);
2283
2284static void
2285pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2286
2287static size_t
2288pm_statements_node_body_length(pm_statements_node_t *node);
2289
2294static PRISM_INLINE void
2295pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
2296 if (integer->values != NULL) {
2297 size_t byte_size = integer->length * sizeof(uint32_t);
2298 uint32_t *old_values = integer->values;
2299 integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
2300 xfree(old_values);
2301 }
2302}
2303
2307static pm_error_recovery_node_t *
2308pm_error_recovery_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2309 return pm_error_recovery_node_new(
2310 parser->arena,
2311 ++parser->node_id,
2312 0,
2313 ((pm_location_t) { .start = start, .length = length }),
2314 NULL
2315 );
2316}
2317
2321static pm_error_recovery_node_t *
2322pm_error_recovery_node_create_unexpected(pm_parser_t *parser, pm_node_t *unexpected) {
2323 return pm_error_recovery_node_new(
2324 parser->arena,
2325 ++parser->node_id,
2326 0,
2327 unexpected->location,
2328 unexpected
2329 );
2330}
2331
2335static pm_alias_global_variable_node_t *
2336pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2337 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2338
2339 return pm_alias_global_variable_node_new(
2340 parser->arena,
2341 ++parser->node_id,
2342 0,
2343 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2344 new_name,
2345 old_name,
2346 TOK2LOC(parser, keyword)
2347 );
2348}
2349
2353static pm_alias_method_node_t *
2354pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2355 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2356
2357 return pm_alias_method_node_new(
2358 parser->arena,
2359 ++parser->node_id,
2360 0,
2361 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2362 new_name,
2363 old_name,
2364 TOK2LOC(parser, keyword)
2365 );
2366}
2367
2371static pm_alternation_pattern_node_t *
2372pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2373 return pm_alternation_pattern_node_new(
2374 parser->arena,
2375 ++parser->node_id,
2376 0,
2377 PM_LOCATION_INIT_NODES(left, right),
2378 left,
2379 right,
2380 TOK2LOC(parser, operator)
2381 );
2382}
2383
2387static pm_and_node_t *
2388pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2389 pm_assert_value_expression(parser, left);
2390
2391 return pm_and_node_new(
2392 parser->arena,
2393 ++parser->node_id,
2394 0,
2395 PM_LOCATION_INIT_NODES(left, right),
2396 left,
2397 right,
2398 TOK2LOC(parser, operator)
2399 );
2400}
2401
2405static pm_arguments_node_t *
2406pm_arguments_node_create(pm_parser_t *parser) {
2407 return pm_arguments_node_new(
2408 parser->arena,
2409 ++parser->node_id,
2410 0,
2411 PM_LOCATION_INIT_UNSET,
2412 ((pm_node_list_t) { 0 })
2413 );
2414}
2415
2419static size_t
2420pm_arguments_node_size(pm_arguments_node_t *node) {
2421 return node->arguments.size;
2422}
2423
2427static void
2428pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
2429 if (pm_arguments_node_size(node) == 0) {
2430 PM_NODE_START_SET_NODE(node, argument);
2431 }
2432
2433 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2434 PM_NODE_LENGTH_SET_NODE(node, argument);
2435 }
2436
2437 pm_node_list_append(arena, &node->arguments, argument);
2438
2439 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2440 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2441 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2442 } else {
2443 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2444 }
2445 }
2446}
2447
2451static pm_array_node_t *
2452pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2453 if (opening == NULL) {
2454 return pm_array_node_new(
2455 parser->arena,
2456 ++parser->node_id,
2457 PM_NODE_FLAG_STATIC_LITERAL,
2458 PM_LOCATION_INIT_UNSET,
2459 ((pm_node_list_t) { 0 }),
2460 ((pm_location_t) { 0 }),
2461 ((pm_location_t) { 0 })
2462 );
2463 } else {
2464 return pm_array_node_new(
2465 parser->arena,
2466 ++parser->node_id,
2467 PM_NODE_FLAG_STATIC_LITERAL,
2468 PM_LOCATION_INIT_TOKEN(parser, opening),
2469 ((pm_node_list_t) { 0 }),
2470 TOK2LOC(parser, opening),
2471 TOK2LOC(parser, opening)
2472 );
2473 }
2474}
2475
2479static PRISM_INLINE void
2480pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
2481 if (!node->elements.size && !node->opening_loc.length) {
2482 PM_NODE_START_SET_NODE(node, element);
2483 }
2484
2485 pm_node_list_append(arena, &node->elements, element);
2486 PM_NODE_LENGTH_SET_NODE(node, element);
2487
2488 // If the element is not a static literal, then the array is not a static
2489 // literal. Turn that flag off.
2490 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2491 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2492 }
2493
2494 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2495 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2496 }
2497}
2498
2502static void
2503pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2504 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2505 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2506 node->closing_loc = TOK2LOC(parser, closing);
2507}
2508
2513static pm_array_pattern_node_t *
2514pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2515 pm_array_pattern_node_t *node = pm_array_pattern_node_new(
2516 parser->arena,
2517 ++parser->node_id,
2518 0,
2519 PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2520 NULL,
2521 ((pm_node_list_t) { 0 }),
2522 NULL,
2523 ((pm_node_list_t) { 0 }),
2524 ((pm_location_t) { 0 }),
2525 ((pm_location_t) { 0 })
2526 );
2527
2528 // For now we're going to just copy over each pointer manually. This could be
2529 // much more efficient, as we could instead resize the node list.
2530 bool found_rest = false;
2531 pm_node_t *child;
2532
2533 PM_NODE_LIST_FOREACH(nodes, index, child) {
2534 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2535 node->rest = child;
2536 found_rest = true;
2537 } else if (found_rest) {
2538 pm_node_list_append(parser->arena, &node->posts, child);
2539 } else {
2540 pm_node_list_append(parser->arena, &node->requireds, child);
2541 }
2542 }
2543
2544 return node;
2545}
2546
2550static pm_array_pattern_node_t *
2551pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2552 return pm_array_pattern_node_new(
2553 parser->arena,
2554 ++parser->node_id,
2555 0,
2556 PM_LOCATION_INIT_NODE(rest),
2557 NULL,
2558 ((pm_node_list_t) { 0 }),
2559 rest,
2560 ((pm_node_list_t) { 0 }),
2561 ((pm_location_t) { 0 }),
2562 ((pm_location_t) { 0 })
2563 );
2564}
2565
2570static pm_array_pattern_node_t *
2571pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2572 return pm_array_pattern_node_new(
2573 parser->arena,
2574 ++parser->node_id,
2575 0,
2576 PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
2577 constant,
2578 ((pm_node_list_t) { 0 }),
2579 NULL,
2580 ((pm_node_list_t) { 0 }),
2581 TOK2LOC(parser, opening),
2582 TOK2LOC(parser, closing)
2583 );
2584}
2585
2590static pm_array_pattern_node_t *
2591pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2592 return pm_array_pattern_node_new(
2593 parser->arena,
2594 ++parser->node_id,
2595 0,
2596 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2597 NULL,
2598 ((pm_node_list_t) { 0 }),
2599 NULL,
2600 ((pm_node_list_t) { 0 }),
2601 TOK2LOC(parser, opening),
2602 TOK2LOC(parser, closing)
2603 );
2604}
2605
2606static PRISM_INLINE void
2607pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
2608 pm_node_list_append(arena, &node->requireds, inner);
2609}
2610
2614static pm_assoc_node_t *
2615pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2616 uint32_t end;
2617
2618 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2619 end = PM_NODE_END(value);
2620 } else if (operator != NULL) {
2621 end = PM_TOKEN_END(parser, operator);
2622 } else {
2623 end = PM_NODE_END(key);
2624 }
2625
2626 // Hash string keys will be frozen, so we can mark them as frozen here so
2627 // that the compiler picks them up and also when we check for static literal
2628 // on the keys it gets factored in.
2629 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2630 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2631 }
2632
2633 // If the key and value of this assoc node are both static literals, then
2634 // we can mark this node as a static literal.
2635 pm_node_flags_t flags = 0;
2636 if (
2637 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2638 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2639 ) {
2640 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2641 }
2642
2643 return pm_assoc_node_new(
2644 parser->arena,
2645 ++parser->node_id,
2646 flags,
2647 ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
2648 key,
2649 value,
2650 NTOK2LOC(parser, operator)
2651 );
2652}
2653
2657static pm_assoc_splat_node_t *
2658pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2659 assert(operator->type == PM_TOKEN_USTAR_STAR);
2660
2661 return pm_assoc_splat_node_new(
2662 parser->arena,
2663 ++parser->node_id,
2664 0,
2665 (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
2666 value,
2667 TOK2LOC(parser, operator)
2668 );
2669}
2670
2674static pm_back_reference_read_node_t *
2675pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2676 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2677
2678 return pm_back_reference_read_node_new(
2679 parser->arena,
2680 ++parser->node_id,
2681 0,
2682 PM_LOCATION_INIT_TOKEN(parser, name),
2683 pm_parser_constant_id_token(parser, name)
2684 );
2685}
2686
2690static pm_begin_node_t *
2691pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2692 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2693 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2694
2695 return pm_begin_node_new(
2696 parser->arena,
2697 ++parser->node_id,
2698 0,
2699 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2700 NTOK2LOC(parser, begin_keyword),
2701 statements,
2702 NULL,
2703 NULL,
2704 NULL,
2705 ((pm_location_t) { 0 })
2706 );
2707}
2708
2712static void
2713pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2714 if (node->begin_keyword_loc.length == 0) {
2715 PM_NODE_START_SET_NODE(node, rescue_clause);
2716 }
2717 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2718 node->rescue_clause = rescue_clause;
2719}
2720
2724static void
2725pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2726 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2727 PM_NODE_START_SET_NODE(node, else_clause);
2728 }
2729 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2730 node->else_clause = else_clause;
2731}
2732
2736static void
2737pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2738 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2739 PM_NODE_START_SET_NODE(node, ensure_clause);
2740 }
2741 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2742 node->ensure_clause = ensure_clause;
2743}
2744
2748static void
2749pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2750 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2751 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2752 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2753}
2754
2758static pm_block_argument_node_t *
2759pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2760 assert(operator->type == PM_TOKEN_UAMPERSAND);
2761
2762 return pm_block_argument_node_new(
2763 parser->arena,
2764 ++parser->node_id,
2765 0,
2766 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
2767 expression,
2768 TOK2LOC(parser, operator)
2769 );
2770}
2771
2775static pm_block_node_t *
2776pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2777 return pm_block_node_new(
2778 parser->arena,
2779 ++parser->node_id,
2780 0,
2781 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2782 *locals,
2783 parameters,
2784 body,
2785 TOK2LOC(parser, opening),
2786 TOK2LOC(parser, closing)
2787 );
2788}
2789
2793static pm_block_parameter_node_t *
2794pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2795 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2796
2797 return pm_block_parameter_node_new(
2798 parser->arena,
2799 ++parser->node_id,
2800 0,
2801 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
2802 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2803 NTOK2LOC(parser, name),
2804 TOK2LOC(parser, operator)
2805 );
2806}
2807
2811static pm_block_parameters_node_t *
2812pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2813 uint32_t start;
2814 if (opening != NULL) {
2815 start = PM_TOKEN_START(parser, opening);
2816 } else if (parameters != NULL) {
2817 start = PM_NODE_START(parameters);
2818 } else {
2819 start = 0;
2820 }
2821
2822 uint32_t end;
2823 if (parameters != NULL) {
2824 end = PM_NODE_END(parameters);
2825 } else if (opening != NULL) {
2826 end = PM_TOKEN_END(parser, opening);
2827 } else {
2828 end = 0;
2829 }
2830
2831 return pm_block_parameters_node_new(
2832 parser->arena,
2833 ++parser->node_id,
2834 0,
2835 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2836 parameters,
2837 ((pm_node_list_t) { 0 }),
2838 NTOK2LOC(parser, opening),
2839 ((pm_location_t) { 0 })
2840 );
2841}
2842
2846static void
2847pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2848 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2849 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2850 node->closing_loc = TOK2LOC(parser, closing);
2851}
2852
2856static pm_block_local_variable_node_t *
2857pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2858 return pm_block_local_variable_node_new(
2859 parser->arena,
2860 ++parser->node_id,
2861 0,
2862 PM_LOCATION_INIT_TOKEN(parser, name),
2863 pm_parser_constant_id_token(parser, name)
2864 );
2865}
2866
2870static void
2871pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2872 pm_node_list_append(arena, &node->locals, UP(local));
2873
2874 if (PM_NODE_LENGTH(node) == 0) {
2875 PM_NODE_START_SET_NODE(node, local);
2876 }
2877
2878 PM_NODE_LENGTH_SET_NODE(node, local);
2879}
2880
2884static pm_break_node_t *
2885pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2886 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2887
2888 return pm_break_node_new(
2889 parser->arena,
2890 ++parser->node_id,
2891 0,
2892 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
2893 arguments,
2894 TOK2LOC(parser, keyword)
2895 );
2896}
2897
2898// There are certain flags that we want to use internally but don't want to
2899// expose because they are not relevant beyond parsing. Therefore we'll define
2900// them here and not define them in config.yml/a header file.
2901static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2902
2903static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2904static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2905static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2906
2912static pm_call_node_t *
2913pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2914 return pm_call_node_new(
2915 parser->arena,
2916 ++parser->node_id,
2917 flags,
2918 PM_LOCATION_INIT_UNSET,
2919 NULL,
2920 ((pm_location_t) { 0 }),
2921 0,
2922 ((pm_location_t) { 0 }),
2923 ((pm_location_t) { 0 }),
2924 NULL,
2925 ((pm_location_t) { 0 }),
2926 ((pm_location_t) { 0 }),
2927 NULL
2928 );
2929}
2930
2935static PRISM_INLINE pm_node_flags_t
2936pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2937 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2938}
2939
2944static pm_call_node_t *
2945pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2946 pm_assert_value_expression(parser, receiver);
2947
2948 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2949 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2950 flags |= PM_CALL_NODE_FLAGS_INDEX;
2951 }
2952
2953 pm_call_node_t *node = pm_call_node_create(parser, flags);
2954
2955 PM_NODE_START_SET_NODE(node, receiver);
2956
2957 const pm_location_t *end = pm_arguments_end(arguments);
2958 assert(end != NULL && "unreachable");
2959 PM_NODE_LENGTH_SET_LOCATION(node, end);
2960
2961 node->receiver = receiver;
2962 node->message_loc.start = arguments->opening_loc.start;
2963 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2964
2965 node->opening_loc = arguments->opening_loc;
2966 node->arguments = arguments->arguments;
2967 node->closing_loc = arguments->closing_loc;
2968 node->block = arguments->block;
2969
2970 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2971 return node;
2972}
2973
2977static pm_call_node_t *
2978pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2979 pm_assert_value_expression(parser, receiver);
2980 pm_assert_value_expression(parser, argument);
2981
2982 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2983
2984 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2985 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2986
2987 node->receiver = receiver;
2988 node->message_loc = TOK2LOC(parser, operator);
2989
2990 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2991 pm_arguments_node_arguments_append(parser->arena, arguments, argument);
2992 node->arguments = arguments;
2993
2994 node->name = pm_parser_constant_id_token(parser, operator);
2995 return node;
2996}
2997
2998static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2999
3003static pm_call_node_t *
3004pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
3005 pm_assert_value_expression(parser, receiver);
3006
3007 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3008
3009 PM_NODE_START_SET_NODE(node, receiver);
3010 const pm_location_t *end = pm_arguments_end(arguments);
3011 if (end == NULL) {
3012 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
3013 } else {
3014 PM_NODE_LENGTH_SET_LOCATION(node, end);
3015 }
3016
3017 node->receiver = receiver;
3018 node->call_operator_loc = TOK2LOC(parser, operator);
3019 node->message_loc = TOK2LOC(parser, message);
3020 node->opening_loc = arguments->opening_loc;
3021 node->arguments = arguments->arguments;
3022 node->closing_loc = arguments->closing_loc;
3023 node->block = arguments->block;
3024
3025 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
3026 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
3027 }
3028
3033 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
3034 return node;
3035}
3036
3040static pm_call_node_t *
3041pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
3042 pm_call_node_t *node = pm_call_node_create(parser, 0);
3043 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
3044
3045 node->receiver = receiver;
3046 node->arguments = arguments;
3047
3048 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
3049 return node;
3050}
3051
3056static pm_call_node_t *
3057pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
3058 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3059
3060 PM_NODE_START_SET_TOKEN(parser, node, message);
3061 const pm_location_t *end = pm_arguments_end(arguments);
3062 assert(end != NULL && "unreachable");
3063 PM_NODE_LENGTH_SET_LOCATION(node, end);
3064
3065 node->message_loc = TOK2LOC(parser, message);
3066 node->opening_loc = arguments->opening_loc;
3067 node->arguments = arguments->arguments;
3068 node->closing_loc = arguments->closing_loc;
3069 node->block = arguments->block;
3070
3071 node->name = pm_parser_constant_id_token(parser, message);
3072 return node;
3073}
3074
3079static pm_call_node_t *
3080pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
3081 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3082
3083 node->base.location = (pm_location_t) { 0 };
3084 node->arguments = arguments;
3085
3086 node->name = name;
3087 return node;
3088}
3089
3093static pm_call_node_t *
3094pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
3095 pm_assert_value_expression(parser, receiver);
3096 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
3097
3098 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
3099
3100 PM_NODE_START_SET_TOKEN(parser, node, message);
3101 if (arguments->closing_loc.length > 0) {
3102 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
3103 } else {
3104 assert(receiver != NULL);
3105 PM_NODE_LENGTH_SET_NODE(node, receiver);
3106 }
3107
3108 node->receiver = receiver;
3109 node->message_loc = TOK2LOC(parser, message);
3110 node->opening_loc = arguments->opening_loc;
3111 node->arguments = arguments->arguments;
3112 node->closing_loc = arguments->closing_loc;
3113
3114 node->name = pm_parser_constant_id_constant(parser, "!", 1);
3115 return node;
3116}
3117
3121static pm_call_node_t *
3122pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
3123 pm_assert_value_expression(parser, receiver);
3124
3125 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3126
3127 PM_NODE_START_SET_NODE(node, receiver);
3128 const pm_location_t *end = pm_arguments_end(arguments);
3129 assert(end != NULL && "unreachable");
3130 PM_NODE_LENGTH_SET_LOCATION(node, end);
3131
3132 node->receiver = receiver;
3133 node->call_operator_loc = TOK2LOC(parser, operator);
3134 node->opening_loc = arguments->opening_loc;
3135 node->arguments = arguments->arguments;
3136 node->closing_loc = arguments->closing_loc;
3137 node->block = arguments->block;
3138
3139 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
3140 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
3141 }
3142
3143 node->name = pm_parser_constant_id_constant(parser, "call", 4);
3144 return node;
3145}
3146
3150static pm_call_node_t *
3151pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
3152 pm_assert_value_expression(parser, receiver);
3153
3154 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
3155
3156 PM_NODE_START_SET_TOKEN(parser, node, operator);
3157 PM_NODE_LENGTH_SET_NODE(node, receiver);
3158
3159 node->receiver = receiver;
3160 node->message_loc = TOK2LOC(parser, operator);
3161
3162 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
3163 return node;
3164}
3165
3170static pm_call_node_t *
3171pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
3172 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
3173
3174 node->base.location = TOK2LOC(parser, message);
3175 node->message_loc = TOK2LOC(parser, message);
3176
3177 node->name = pm_parser_constant_id_token(parser, message);
3178 return node;
3179}
3180
3185static PRISM_INLINE bool
3186pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
3187 return (
3188 (node->message_loc.length > 0) &&
3189 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
3190 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
3191 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
3192 (node->opening_loc.length == 0) &&
3193 (node->arguments == NULL) &&
3194 (node->block == NULL)
3195 );
3196}
3197
3201static void
3202pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
3203 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
3204
3205 if (write_constant->length > 0) {
3206 size_t length = write_constant->length - 1;
3207
3208 uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
3209 memcpy(memory, write_constant->start, length);
3210
3211 *read_name = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, memory, length);
3212 } else {
3213 // We can get here if the message was missing because of a syntax error.
3214 *read_name = pm_parser_constant_id_constant(parser, "", 0);
3215 }
3216}
3217
3221static pm_call_and_write_node_t *
3222pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3223 assert(target->block == NULL);
3224 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3225
3226 pm_call_and_write_node_t *node = pm_call_and_write_node_new(
3227 parser->arena,
3228 ++parser->node_id,
3229 FL(target),
3230 PM_LOCATION_INIT_NODES(target, value),
3231 target->receiver,
3232 target->call_operator_loc,
3233 target->message_loc,
3234 0,
3235 target->name,
3236 TOK2LOC(parser, operator),
3237 value
3238 );
3239
3240 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3241
3242 // The target is no longer necessary because we've reused its children.
3243 // It is arena-allocated so no explicit free is needed.
3244
3245 return node;
3246}
3247
3252static void
3253pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
3254 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
3255 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
3256 pm_node_t *node;
3257 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
3258 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
3259 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
3260 break;
3261 }
3262 }
3263 }
3264
3265 if (block != NULL) {
3266 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
3267 }
3268 }
3269}
3270
3274static pm_index_and_write_node_t *
3275pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3276 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3277
3278 pm_index_arguments_check(parser, target->arguments, target->block);
3279
3280 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3281
3282 pm_index_and_write_node_t *node = pm_index_and_write_node_new(
3283 parser->arena,
3284 ++parser->node_id,
3285 FL(target),
3286 PM_LOCATION_INIT_NODES(target, value),
3287 target->receiver,
3288 target->call_operator_loc,
3289 target->opening_loc,
3290 target->arguments,
3291 target->closing_loc,
3292 (pm_block_argument_node_t *) target->block,
3293 TOK2LOC(parser, operator),
3294 value
3295 );
3296
3297 // The target is no longer necessary because we've reused its children.
3298 // It is arena-allocated so no explicit free is needed.
3299
3300 return node;
3301}
3302
3306static pm_call_operator_write_node_t *
3307pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3308 assert(target->block == NULL);
3309
3310 pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
3311 parser->arena,
3312 ++parser->node_id,
3313 FL(target),
3314 PM_LOCATION_INIT_NODES(target, value),
3315 target->receiver,
3316 target->call_operator_loc,
3317 target->message_loc,
3318 0,
3319 target->name,
3320 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3321 TOK2LOC(parser, operator),
3322 value
3323 );
3324
3325 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3326
3327 // The target is no longer necessary because we've reused its children.
3328 // It is arena-allocated so no explicit free is needed.
3329
3330 return node;
3331}
3332
3336static pm_index_operator_write_node_t *
3337pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3338 pm_index_arguments_check(parser, target->arguments, target->block);
3339
3340 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3341
3342 pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
3343 parser->arena,
3344 ++parser->node_id,
3345 FL(target),
3346 PM_LOCATION_INIT_NODES(target, value),
3347 target->receiver,
3348 target->call_operator_loc,
3349 target->opening_loc,
3350 target->arguments,
3351 target->closing_loc,
3352 (pm_block_argument_node_t *) target->block,
3353 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3354 TOK2LOC(parser, operator),
3355 value
3356 );
3357
3358 // The target is no longer necessary because we've reused its children.
3359 // It is arena-allocated so no explicit free is needed.
3360
3361 return node;
3362}
3363
3367static pm_call_or_write_node_t *
3368pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3369 assert(target->block == NULL);
3370 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3371
3372 pm_call_or_write_node_t *node = pm_call_or_write_node_new(
3373 parser->arena,
3374 ++parser->node_id,
3375 FL(target),
3376 PM_LOCATION_INIT_NODES(target, value),
3377 target->receiver,
3378 target->call_operator_loc,
3379 target->message_loc,
3380 0,
3381 target->name,
3382 TOK2LOC(parser, operator),
3383 value
3384 );
3385
3386 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3387
3388 // The target is no longer necessary because we've reused its children.
3389 // It is arena-allocated so no explicit free is needed.
3390
3391 return node;
3392}
3393
3397static pm_index_or_write_node_t *
3398pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3399 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3400
3401 pm_index_arguments_check(parser, target->arguments, target->block);
3402
3403 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3404
3405 pm_index_or_write_node_t *node = pm_index_or_write_node_new(
3406 parser->arena,
3407 ++parser->node_id,
3408 FL(target),
3409 PM_LOCATION_INIT_NODES(target, value),
3410 target->receiver,
3411 target->call_operator_loc,
3412 target->opening_loc,
3413 target->arguments,
3414 target->closing_loc,
3415 (pm_block_argument_node_t *) target->block,
3416 TOK2LOC(parser, operator),
3417 value
3418 );
3419
3420 // The target is no longer necessary because we've reused its children.
3421 // It is arena-allocated so no explicit free is needed.
3422
3423 return node;
3424}
3425
3430static pm_call_target_node_t *
3431pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3432 pm_call_target_node_t *node = pm_call_target_node_new(
3433 parser->arena,
3434 ++parser->node_id,
3435 FL(target),
3436 PM_LOCATION_INIT_NODE(target),
3437 target->receiver,
3438 target->call_operator_loc,
3439 target->name,
3440 target->message_loc
3441 );
3442
3443 /* It is possible to get here where we have parsed an invalid syntax tree
3444 * where the call operator was not present. In that case we will have a
3445 * problem because it is a required location. In this case we need to fill
3446 * it in with a fake location so that the syntax tree remains valid. */
3447 if (node->call_operator_loc.length == 0) {
3448 node->call_operator_loc = target->base.location;
3449 }
3450
3451 // The target is no longer necessary because we've reused its children.
3452 // It is arena-allocated so no explicit free is needed.
3453
3454 return node;
3455}
3456
3461static pm_index_target_node_t *
3462pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3463 pm_index_arguments_check(parser, target->arguments, target->block);
3464 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3465
3466 pm_index_target_node_t *node = pm_index_target_node_new(
3467 parser->arena,
3468 ++parser->node_id,
3469 FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3470 PM_LOCATION_INIT_NODE(target),
3471 target->receiver,
3472 target->opening_loc,
3473 target->arguments,
3474 target->closing_loc,
3475 (pm_block_argument_node_t *) target->block
3476 );
3477
3478 // The target is no longer necessary because we've reused its children.
3479 // It is arena-allocated so no explicit free is needed.
3480
3481 return node;
3482}
3483
3487static pm_capture_pattern_node_t *
3488pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3489 return pm_capture_pattern_node_new(
3490 parser->arena,
3491 ++parser->node_id,
3492 0,
3493 PM_LOCATION_INIT_NODES(value, target),
3494 value,
3495 target,
3496 TOK2LOC(parser, operator)
3497 );
3498}
3499
3503static pm_case_node_t *
3504pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3505 return pm_case_node_new(
3506 parser->arena,
3507 ++parser->node_id,
3508 0,
3509 PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
3510 predicate,
3511 ((pm_node_list_t) { 0 }),
3512 NULL,
3513 TOK2LOC(parser, case_keyword),
3514 NTOK2LOC(parser, end_keyword)
3515 );
3516}
3517
3521static void
3522pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
3523 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3524
3525 pm_node_list_append(arena, &node->conditions, condition);
3526 PM_NODE_LENGTH_SET_NODE(node, condition);
3527}
3528
3532static void
3533pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3534 node->else_clause = else_clause;
3535 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3536}
3537
3541static void
3542pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3543 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3544 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3545}
3546
3550static pm_case_match_node_t *
3551pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3552 return pm_case_match_node_new(
3553 parser->arena,
3554 ++parser->node_id,
3555 0,
3556 PM_LOCATION_INIT_TOKEN(parser, case_keyword),
3557 predicate,
3558 ((pm_node_list_t) { 0 }),
3559 NULL,
3560 TOK2LOC(parser, case_keyword),
3561 ((pm_location_t) { 0 })
3562 );
3563}
3564
3568static void
3569pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
3570 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3571
3572 pm_node_list_append(arena, &node->conditions, condition);
3573 PM_NODE_LENGTH_SET_NODE(node, condition);
3574}
3575
3579static void
3580pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3581 node->else_clause = else_clause;
3582 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3583}
3584
3588static void
3589pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3590 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3591 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3592}
3593
3597static pm_class_node_t *
3598pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3599 return pm_class_node_new(
3600 parser->arena,
3601 ++parser->node_id,
3602 0,
3603 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
3604 *locals,
3605 TOK2LOC(parser, class_keyword),
3606 constant_path,
3607 NTOK2LOC(parser, inheritance_operator),
3608 superclass,
3609 body,
3610 TOK2LOC(parser, end_keyword),
3611 pm_parser_constant_id_token(parser, name)
3612 );
3613}
3614
3618static pm_class_variable_and_write_node_t *
3619pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3620 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3621
3622 return pm_class_variable_and_write_node_new(
3623 parser->arena,
3624 ++parser->node_id,
3625 0,
3626 PM_LOCATION_INIT_NODES(target, value),
3627 target->name,
3628 target->base.location,
3629 TOK2LOC(parser, operator),
3630 value
3631 );
3632}
3633
3637static pm_class_variable_operator_write_node_t *
3638pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3639 return pm_class_variable_operator_write_node_new(
3640 parser->arena,
3641 ++parser->node_id,
3642 0,
3643 PM_LOCATION_INIT_NODES(target, value),
3644 target->name,
3645 target->base.location,
3646 TOK2LOC(parser, operator),
3647 value,
3648 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3649 );
3650}
3651
3655static pm_class_variable_or_write_node_t *
3656pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3657 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3658
3659 return pm_class_variable_or_write_node_new(
3660 parser->arena,
3661 ++parser->node_id,
3662 0,
3663 PM_LOCATION_INIT_NODES(target, value),
3664 target->name,
3665 target->base.location,
3666 TOK2LOC(parser, operator),
3667 value
3668 );
3669}
3670
3674static pm_class_variable_read_node_t *
3675pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3676 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3677
3678 return pm_class_variable_read_node_new(
3679 parser->arena,
3680 ++parser->node_id,
3681 0,
3682 PM_LOCATION_INIT_TOKEN(parser, token),
3683 pm_parser_constant_id_token(parser, token)
3684 );
3685}
3686
3693static PRISM_INLINE pm_node_flags_t
3694pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3695 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3696 return flags;
3697 }
3698 return 0;
3699}
3700
3704static pm_class_variable_write_node_t *
3705pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3706 return pm_class_variable_write_node_new(
3707 parser->arena,
3708 ++parser->node_id,
3709 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3710 PM_LOCATION_INIT_NODES(read_node, value),
3711 read_node->name,
3712 read_node->base.location,
3713 value,
3714 TOK2LOC(parser, operator)
3715 );
3716}
3717
3721static pm_constant_path_and_write_node_t *
3722pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3723 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3724
3725 return pm_constant_path_and_write_node_new(
3726 parser->arena,
3727 ++parser->node_id,
3728 0,
3729 PM_LOCATION_INIT_NODES(target, value),
3730 target,
3731 TOK2LOC(parser, operator),
3732 value
3733 );
3734}
3735
3739static pm_constant_path_operator_write_node_t *
3740pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3741 return pm_constant_path_operator_write_node_new(
3742 parser->arena,
3743 ++parser->node_id,
3744 0,
3745 PM_LOCATION_INIT_NODES(target, value),
3746 target,
3747 TOK2LOC(parser, operator),
3748 value,
3749 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3750 );
3751}
3752
3756static pm_constant_path_or_write_node_t *
3757pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3758 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3759
3760 return pm_constant_path_or_write_node_new(
3761 parser->arena,
3762 ++parser->node_id,
3763 0,
3764 PM_LOCATION_INIT_NODES(target, value),
3765 target,
3766 TOK2LOC(parser, operator),
3767 value
3768 );
3769}
3770
3774static pm_constant_path_node_t *
3775pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3776 pm_assert_value_expression(parser, parent);
3777
3778 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3779 if (name_token->type == PM_TOKEN_CONSTANT) {
3780 name = pm_parser_constant_id_token(parser, name_token);
3781 }
3782
3783 return pm_constant_path_node_new(
3784 parser->arena,
3785 ++parser->node_id,
3786 0,
3787 (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
3788 parent,
3789 name,
3790 TOK2LOC(parser, delimiter),
3791 TOK2LOC(parser, name_token)
3792 );
3793}
3794
3798static pm_constant_path_write_node_t *
3799pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3800 return pm_constant_path_write_node_new(
3801 parser->arena,
3802 ++parser->node_id,
3803 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3804 PM_LOCATION_INIT_NODES(target, value),
3805 target,
3806 TOK2LOC(parser, operator),
3807 value
3808 );
3809}
3810
3814static pm_constant_and_write_node_t *
3815pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3816 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3817
3818 return pm_constant_and_write_node_new(
3819 parser->arena,
3820 ++parser->node_id,
3821 0,
3822 PM_LOCATION_INIT_NODES(target, value),
3823 target->name,
3824 target->base.location,
3825 TOK2LOC(parser, operator),
3826 value
3827 );
3828}
3829
3833static pm_constant_operator_write_node_t *
3834pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3835 return pm_constant_operator_write_node_new(
3836 parser->arena,
3837 ++parser->node_id,
3838 0,
3839 PM_LOCATION_INIT_NODES(target, value),
3840 target->name,
3841 target->base.location,
3842 TOK2LOC(parser, operator),
3843 value,
3844 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3845 );
3846}
3847
3851static pm_constant_or_write_node_t *
3852pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3853 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3854
3855 return pm_constant_or_write_node_new(
3856 parser->arena,
3857 ++parser->node_id,
3858 0,
3859 PM_LOCATION_INIT_NODES(target, value),
3860 target->name,
3861 target->base.location,
3862 TOK2LOC(parser, operator),
3863 value
3864 );
3865}
3866
3870static pm_constant_read_node_t *
3871pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3872 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3873
3874 return pm_constant_read_node_new(
3875 parser->arena,
3876 ++parser->node_id,
3877 0,
3878 PM_LOCATION_INIT_TOKEN(parser, name),
3879 pm_parser_constant_id_token(parser, name)
3880 );
3881}
3882
3886static pm_constant_write_node_t *
3887pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3888 return pm_constant_write_node_new(
3889 parser->arena,
3890 ++parser->node_id,
3891 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3892 PM_LOCATION_INIT_NODES(target, value),
3893 target->name,
3894 target->base.location,
3895 value,
3896 TOK2LOC(parser, operator)
3897 );
3898}
3899
3903static void
3904pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3905 switch (PM_NODE_TYPE(node)) {
3906 case PM_BEGIN_NODE: {
3907 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3908 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3909 break;
3910 }
3911 case PM_PARENTHESES_NODE: {
3912 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3913 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3914 break;
3915 }
3916 case PM_STATEMENTS_NODE: {
3917 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3918 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3919 break;
3920 }
3921 case PM_ARRAY_NODE:
3922 case PM_FLOAT_NODE:
3923 case PM_IMAGINARY_NODE:
3924 case PM_INTEGER_NODE:
3925 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3926 case PM_INTERPOLATED_STRING_NODE:
3927 case PM_INTERPOLATED_SYMBOL_NODE:
3928 case PM_INTERPOLATED_X_STRING_NODE:
3929 case PM_RATIONAL_NODE:
3930 case PM_REGULAR_EXPRESSION_NODE:
3931 case PM_SOURCE_ENCODING_NODE:
3932 case PM_SOURCE_FILE_NODE:
3933 case PM_SOURCE_LINE_NODE:
3934 case PM_STRING_NODE:
3935 case PM_SYMBOL_NODE:
3936 case PM_X_STRING_NODE:
3937 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3938 break;
3939 default:
3940 break;
3941 }
3942}
3943
3947static pm_def_node_t *
3948pm_def_node_create(
3949 pm_parser_t *parser,
3950 pm_constant_id_t name,
3951 const pm_token_t *name_loc,
3952 pm_node_t *receiver,
3953 pm_parameters_node_t *parameters,
3954 pm_node_t *body,
3955 pm_constant_id_list_t *locals,
3956 const pm_token_t *def_keyword,
3957 const pm_token_t *operator,
3958 const pm_token_t *lparen,
3959 const pm_token_t *rparen,
3960 const pm_token_t *equal,
3961 const pm_token_t *end_keyword
3962) {
3963 if (receiver != NULL) {
3964 pm_def_node_receiver_check(parser, receiver);
3965 }
3966
3967 return pm_def_node_new(
3968 parser->arena,
3969 ++parser->node_id,
3970 0,
3971 (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
3972 name,
3973 TOK2LOC(parser, name_loc),
3974 receiver,
3975 parameters,
3976 body,
3977 *locals,
3978 TOK2LOC(parser, def_keyword),
3979 NTOK2LOC(parser, operator),
3980 NTOK2LOC(parser, lparen),
3981 NTOK2LOC(parser, rparen),
3982 NTOK2LOC(parser, equal),
3983 NTOK2LOC(parser, end_keyword)
3984 );
3985}
3986
3990static pm_defined_node_t *
3991pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3992 return pm_defined_node_new(
3993 parser->arena,
3994 ++parser->node_id,
3995 0,
3996 (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
3997 NTOK2LOC(parser, lparen),
3998 value,
3999 NTOK2LOC(parser, rparen),
4000 TOK2LOC(parser, keyword)
4001 );
4002}
4003
4007static pm_else_node_t *
4008pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4009 return pm_else_node_new(
4010 parser->arena,
4011 ++parser->node_id,
4012 0,
4013 ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
4014 TOK2LOC(parser, else_keyword),
4015 statements,
4016 NTOK2LOC(parser, end_keyword)
4017 );
4018}
4019
4023static pm_embedded_statements_node_t *
4024pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
4025 return pm_embedded_statements_node_new(
4026 parser->arena,
4027 ++parser->node_id,
4028 0,
4029 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4030 TOK2LOC(parser, opening),
4031 statements,
4032 TOK2LOC(parser, closing)
4033 );
4034}
4035
4039static pm_embedded_variable_node_t *
4040pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
4041 return pm_embedded_variable_node_new(
4042 parser->arena,
4043 ++parser->node_id,
4044 0,
4045 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
4046 TOK2LOC(parser, operator),
4047 variable
4048 );
4049}
4050
4054static pm_ensure_node_t *
4055pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4056 return pm_ensure_node_new(
4057 parser->arena,
4058 ++parser->node_id,
4059 0,
4060 PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
4061 TOK2LOC(parser, ensure_keyword),
4062 statements,
4063 TOK2LOC(parser, end_keyword)
4064 );
4065}
4066
4070static pm_false_node_t *
4071pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4072 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4073
4074 return pm_false_node_new(
4075 parser->arena,
4076 ++parser->node_id,
4077 PM_NODE_FLAG_STATIC_LITERAL,
4078 PM_LOCATION_INIT_TOKEN(parser, token)
4079 );
4080}
4081
4086static pm_find_pattern_node_t *
4087pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4088 assert(nodes->size >= 2);
4089 pm_node_t *left = nodes->nodes[0];
4090 pm_node_t *right = nodes->nodes[nodes->size - 1];
4091
4092 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4093 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4094
4095 pm_find_pattern_node_t *node = pm_find_pattern_node_new(
4096 parser->arena,
4097 ++parser->node_id,
4098 0,
4099 PM_LOCATION_INIT_NODES(left, right),
4100 NULL,
4101 (pm_splat_node_t *) left,
4102 ((pm_node_list_t) { 0 }),
4103 (pm_splat_node_t *) right,
4104 ((pm_location_t) { 0 }),
4105 ((pm_location_t) { 0 })
4106 );
4107
4108 // For now we're going to just copy over each pointer manually. This could be
4109 // much more efficient, as we could instead resize the node list to only point
4110 // to 1...-1.
4111 for (size_t index = 1; index < nodes->size - 1; index++) {
4112 pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
4113 }
4114
4115 return node;
4116}
4117
4122static double
4123pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4124 ptrdiff_t diff = token->end - token->start;
4125 if (diff <= 0) return 0.0;
4126
4127 // First, get a buffer of the content.
4128 size_t length = (size_t) diff;
4129 const size_t buffer_size = sizeof(char) * (length + 1);
4130 char *buffer = xmalloc(buffer_size);
4131 memcpy((void *) buffer, token->start, length);
4132
4133 // Next, determine if we need to replace the decimal point because of
4134 // locale-specific options, and then normalize them if we have to.
4135 char decimal_point = *localeconv()->decimal_point;
4136 if (decimal_point != '.') {
4137 for (size_t index = 0; index < length; index++) {
4138 if (buffer[index] == '.') buffer[index] = decimal_point;
4139 }
4140 }
4141
4142 // Next, handle underscores by removing them from the buffer.
4143 for (size_t index = 0; index < length; index++) {
4144 if (buffer[index] == '_') {
4145 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4146 length--;
4147 }
4148 }
4149
4150 // Null-terminate the buffer so that strtod cannot read off the end.
4151 buffer[length] = '\0';
4152
4153 // Now, call strtod to parse the value. Note that CRuby has their own
4154 // version of strtod which avoids locales. We're okay using the locale-aware
4155 // version because we've already validated through the parser that the token
4156 // is in a valid format.
4157 errno = 0;
4158 char *eptr;
4159 double value = strtod(buffer, &eptr);
4160
4161 // This should never happen, because we've already checked that the token
4162 // is in a valid format. However it's good to be safe.
4163 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4164 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
4165 xfree_sized(buffer, buffer_size);
4166 return 0.0;
4167 }
4168
4169 // If errno is set, then it should only be ERANGE. At this point we need to
4170 // check if it's infinity (it should be).
4171 if (errno == ERANGE && PRISM_ISINF(value)) {
4172 int warn_width;
4173 const char *ellipsis;
4174
4175 if (length > 20) {
4176 warn_width = 20;
4177 ellipsis = "...";
4178 } else {
4179 warn_width = (int) length;
4180 ellipsis = "";
4181 }
4182
4183 pm_diagnostic_list_append_format(&parser->metadata_arena, &parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4184 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4185 }
4186
4187 // Finally we can free the buffer and return the value.
4188 xfree_sized(buffer, buffer_size);
4189 return value;
4190}
4191
4195static pm_float_node_t *
4196pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4197 assert(token->type == PM_TOKEN_FLOAT);
4198
4199 return pm_float_node_new(
4200 parser->arena,
4201 ++parser->node_id,
4202 PM_NODE_FLAG_STATIC_LITERAL,
4203 PM_LOCATION_INIT_TOKEN(parser, token),
4204 pm_double_parse(parser, token)
4205 );
4206}
4207
4211static pm_imaginary_node_t *
4212pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4213 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4214
4215 return pm_imaginary_node_new(
4216 parser->arena,
4217 ++parser->node_id,
4218 PM_NODE_FLAG_STATIC_LITERAL,
4219 PM_LOCATION_INIT_TOKEN(parser, token),
4220 UP(pm_float_node_create(parser, &((pm_token_t) {
4221 .type = PM_TOKEN_FLOAT,
4222 .start = token->start,
4223 .end = token->end - 1
4224 })))
4225 );
4226}
4227
4231static pm_rational_node_t *
4232pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4233 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4234
4235 pm_rational_node_t *node = pm_rational_node_new(
4236 parser->arena,
4237 ++parser->node_id,
4238 PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4239 PM_LOCATION_INIT_TOKEN(parser, token),
4240 ((pm_integer_t) { 0 }),
4241 ((pm_integer_t) { 0 })
4242 );
4243
4244 const uint8_t *start = token->start;
4245 const uint8_t *end = token->end - 1; // r
4246
4247 while (start < end && *start == '0') start++; // 0.1 -> .1
4248 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4249
4250 size_t length = (size_t) (end - start);
4251 if (length == 1) {
4252 node->denominator.value = 1;
4253 return node;
4254 }
4255
4256 const uint8_t *point = memchr(start, '.', length);
4257 assert(point && "should have a decimal point");
4258
4259 uint8_t *digits = xmalloc(length);
4260 if (digits == NULL) {
4261 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4262 abort();
4263 }
4264
4265 memcpy(digits, start, (unsigned long) (point - start));
4266 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4267 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4268
4269 size_t fract_length = 0;
4270 for (const uint8_t *fract = point; fract < end; ++fract) {
4271 if (*fract != '_') ++fract_length;
4272 }
4273 digits[0] = '1';
4274 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
4275 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
4276 xfree_sized(digits, length);
4277
4278 pm_integers_reduce(&node->numerator, &node->denominator);
4279 pm_integer_arena_move(parser->arena, &node->numerator);
4280 pm_integer_arena_move(parser->arena, &node->denominator);
4281 return node;
4282}
4283
4288static pm_imaginary_node_t *
4289pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4290 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4291
4292 return pm_imaginary_node_new(
4293 parser->arena,
4294 ++parser->node_id,
4295 PM_NODE_FLAG_STATIC_LITERAL,
4296 PM_LOCATION_INIT_TOKEN(parser, token),
4297 UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4298 .type = PM_TOKEN_FLOAT_RATIONAL,
4299 .start = token->start,
4300 .end = token->end - 1
4301 })))
4302 );
4303}
4304
4308static pm_for_node_t *
4309pm_for_node_create(
4310 pm_parser_t *parser,
4311 pm_node_t *index,
4312 pm_node_t *collection,
4313 pm_statements_node_t *statements,
4314 const pm_token_t *for_keyword,
4315 const pm_token_t *in_keyword,
4316 const pm_token_t *do_keyword,
4317 const pm_token_t *end_keyword
4318) {
4319 return pm_for_node_new(
4320 parser->arena,
4321 ++parser->node_id,
4322 0,
4323 PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
4324 index,
4325 collection,
4326 statements,
4327 TOK2LOC(parser, for_keyword),
4328 TOK2LOC(parser, in_keyword),
4329 NTOK2LOC(parser, do_keyword),
4330 TOK2LOC(parser, end_keyword)
4331 );
4332}
4333
4337static pm_forwarding_arguments_node_t *
4338pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4339 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4340
4341 return pm_forwarding_arguments_node_new(
4342 parser->arena,
4343 ++parser->node_id,
4344 0,
4345 PM_LOCATION_INIT_TOKEN(parser, token)
4346 );
4347}
4348
4352static pm_forwarding_parameter_node_t *
4353pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4354 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4355
4356 return pm_forwarding_parameter_node_new(
4357 parser->arena,
4358 ++parser->node_id,
4359 0,
4360 PM_LOCATION_INIT_TOKEN(parser, token)
4361 );
4362}
4363
4367static pm_forwarding_super_node_t *
4368pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4369 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4370 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4371
4372 pm_block_node_t *block = NULL;
4373 if (arguments->block != NULL) {
4374 block = (pm_block_node_t *) arguments->block;
4375 }
4376
4377 return pm_forwarding_super_node_new(
4378 parser->arena,
4379 ++parser->node_id,
4380 0,
4381 (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
4382 PM_LOCATION_INIT_TOKEN(parser, token),
4383 block
4384 );
4385}
4386
4391static pm_hash_pattern_node_t *
4392pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4393 return pm_hash_pattern_node_new(
4394 parser->arena,
4395 ++parser->node_id,
4396 0,
4397 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4398 NULL,
4399 ((pm_node_list_t) { 0 }),
4400 NULL,
4401 TOK2LOC(parser, opening),
4402 TOK2LOC(parser, closing)
4403 );
4404}
4405
4409static pm_hash_pattern_node_t *
4410pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4411 uint32_t start;
4412 uint32_t end;
4413
4414 if (elements->size > 0) {
4415 if (rest) {
4416 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4417 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4418 } else {
4419 start = PM_NODE_START(elements->nodes[0]);
4420 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4421 }
4422 } else {
4423 assert(rest != NULL);
4424 start = PM_NODE_START(rest);
4425 end = PM_NODE_END(rest);
4426 }
4427
4428 pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
4429 parser->arena,
4430 ++parser->node_id,
4431 0,
4432 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4433 NULL,
4434 ((pm_node_list_t) { 0 }),
4435 rest,
4436 ((pm_location_t) { 0 }),
4437 ((pm_location_t) { 0 })
4438 );
4439
4440 pm_node_list_concat(parser->arena, &node->elements, elements);
4441 return node;
4442}
4443
4447static pm_constant_id_t
4448pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4449 switch (PM_NODE_TYPE(target)) {
4450 case PM_GLOBAL_VARIABLE_READ_NODE:
4451 return ((pm_global_variable_read_node_t *) target)->name;
4452 case PM_BACK_REFERENCE_READ_NODE:
4453 return ((pm_back_reference_read_node_t *) target)->name;
4454 case PM_NUMBERED_REFERENCE_READ_NODE:
4455 // This will only ever happen in the event of a syntax error, but we
4456 // still need to provide something for the node.
4457 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4458 default:
4459 assert(false && "unreachable");
4460 return (pm_constant_id_t) -1;
4461 }
4462}
4463
4467static pm_global_variable_and_write_node_t *
4468pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4469 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4470
4471 return pm_global_variable_and_write_node_new(
4472 parser->arena,
4473 ++parser->node_id,
4474 0,
4475 PM_LOCATION_INIT_NODES(target, value),
4476 pm_global_variable_write_name(parser, target),
4477 target->location,
4478 TOK2LOC(parser, operator),
4479 value
4480 );
4481}
4482
4486static pm_global_variable_operator_write_node_t *
4487pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4488 return pm_global_variable_operator_write_node_new(
4489 parser->arena,
4490 ++parser->node_id,
4491 0,
4492 PM_LOCATION_INIT_NODES(target, value),
4493 pm_global_variable_write_name(parser, target),
4494 target->location,
4495 TOK2LOC(parser, operator),
4496 value,
4497 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4498 );
4499}
4500
4504static pm_global_variable_or_write_node_t *
4505pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4506 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4507
4508 return pm_global_variable_or_write_node_new(
4509 parser->arena,
4510 ++parser->node_id,
4511 0,
4512 PM_LOCATION_INIT_NODES(target, value),
4513 pm_global_variable_write_name(parser, target),
4514 target->location,
4515 TOK2LOC(parser, operator),
4516 value
4517 );
4518}
4519
4523static pm_global_variable_read_node_t *
4524pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4525 return pm_global_variable_read_node_new(
4526 parser->arena,
4527 ++parser->node_id,
4528 0,
4529 PM_LOCATION_INIT_TOKEN(parser, name),
4530 pm_parser_constant_id_token(parser, name)
4531 );
4532}
4533
4537static pm_global_variable_read_node_t *
4538pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4539 return pm_global_variable_read_node_new(
4540 parser->arena,
4541 ++parser->node_id,
4542 0,
4543 PM_LOCATION_INIT_UNSET,
4544 name
4545 );
4546}
4547
4551static pm_global_variable_write_node_t *
4552pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4553 return pm_global_variable_write_node_new(
4554 parser->arena,
4555 ++parser->node_id,
4556 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4557 PM_LOCATION_INIT_NODES(target, value),
4558 pm_global_variable_write_name(parser, target),
4559 target->location,
4560 value,
4561 TOK2LOC(parser, operator)
4562 );
4563}
4564
4568static pm_global_variable_write_node_t *
4569pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4570 return pm_global_variable_write_node_new(
4571 parser->arena,
4572 ++parser->node_id,
4573 0,
4574 PM_LOCATION_INIT_UNSET,
4575 name,
4576 ((pm_location_t) { 0 }),
4577 value,
4578 ((pm_location_t) { 0 })
4579 );
4580}
4581
4585static pm_hash_node_t *
4586pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4587 assert(opening != NULL);
4588
4589 return pm_hash_node_new(
4590 parser->arena,
4591 ++parser->node_id,
4592 PM_NODE_FLAG_STATIC_LITERAL,
4593 PM_LOCATION_INIT_TOKEN(parser, opening),
4594 TOK2LOC(parser, opening),
4595 ((pm_node_list_t) { 0 }),
4596 ((pm_location_t) { 0 })
4597 );
4598}
4599
4603static PRISM_INLINE void
4604pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
4605 pm_node_list_append(arena, &hash->elements, element);
4606
4607 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4608 if (static_literal) {
4609 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4610 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4611 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4612 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4613 }
4614
4615 if (!static_literal) {
4616 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4617 }
4618}
4619
4620static PRISM_INLINE void
4621pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4622 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4623 hash->closing_loc = TOK2LOC(parser, token);
4624}
4625
4629static pm_if_node_t *
4630pm_if_node_create(pm_parser_t *parser,
4631 const pm_token_t *if_keyword,
4632 pm_node_t *predicate,
4633 const pm_token_t *then_keyword,
4634 pm_statements_node_t *statements,
4635 pm_node_t *subsequent,
4636 const pm_token_t *end_keyword
4637) {
4638 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4639
4640 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4641 uint32_t end;
4642
4643 if (end_keyword != NULL) {
4644 end = PM_TOKEN_END(parser, end_keyword);
4645 } else if (subsequent != NULL) {
4646 end = PM_NODE_END(subsequent);
4647 } else if (pm_statements_node_body_length(statements) != 0) {
4648 end = PM_NODE_END(statements);
4649 } else {
4650 end = PM_NODE_END(predicate);
4651 }
4652
4653 return pm_if_node_new(
4654 parser->arena,
4655 ++parser->node_id,
4656 PM_NODE_FLAG_NEWLINE,
4657 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4658 TOK2LOC(parser, if_keyword),
4659 predicate,
4660 NTOK2LOC(parser, then_keyword),
4661 statements,
4662 subsequent,
4663 NTOK2LOC(parser, end_keyword)
4664 );
4665}
4666
4670static pm_if_node_t *
4671pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4672 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4673
4674 pm_statements_node_t *statements = pm_statements_node_create(parser);
4675 pm_statements_node_body_append(parser, statements, statement, true);
4676
4677 return pm_if_node_new(
4678 parser->arena,
4679 ++parser->node_id,
4680 PM_NODE_FLAG_NEWLINE,
4681 PM_LOCATION_INIT_NODES(statement, predicate),
4682 TOK2LOC(parser, if_keyword),
4683 predicate,
4684 ((pm_location_t) { 0 }),
4685 statements,
4686 NULL,
4687 ((pm_location_t) { 0 })
4688 );
4689}
4690
4694static pm_if_node_t *
4695pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4696 pm_assert_value_expression(parser, predicate);
4697 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4698
4699 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4700 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4701
4702 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4703 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4704
4705 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4706 return pm_if_node_new(
4707 parser->arena,
4708 ++parser->node_id,
4709 PM_NODE_FLAG_NEWLINE,
4710 PM_LOCATION_INIT_NODES(predicate, false_expression),
4711 ((pm_location_t) { 0 }),
4712 predicate,
4713 TOK2LOC(parser, qmark),
4714 if_statements,
4715 UP(else_node),
4716 ((pm_location_t) { 0 })
4717 );
4718}
4719
4720static PRISM_INLINE void
4721pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4722 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4723 node->end_keyword_loc = TOK2LOC(parser, keyword);
4724}
4725
4726static PRISM_INLINE void
4727pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4728 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4729 node->end_keyword_loc = TOK2LOC(parser, keyword);
4730}
4731
4735static pm_implicit_node_t *
4736pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4737 return pm_implicit_node_new(
4738 parser->arena,
4739 ++parser->node_id,
4740 0,
4741 PM_LOCATION_INIT_NODE(value),
4742 value
4743 );
4744}
4745
4749static pm_implicit_rest_node_t *
4750pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4751 assert(token->type == PM_TOKEN_COMMA);
4752
4753 return pm_implicit_rest_node_new(
4754 parser->arena,
4755 ++parser->node_id,
4756 0,
4757 PM_LOCATION_INIT_TOKEN(parser, token)
4758 );
4759}
4760
4764static pm_integer_node_t *
4765pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4766 assert(token->type == PM_TOKEN_INTEGER);
4767
4768 pm_integer_node_t *node = pm_integer_node_new(
4769 parser->arena,
4770 ++parser->node_id,
4771 base | PM_NODE_FLAG_STATIC_LITERAL,
4772 PM_LOCATION_INIT_TOKEN(parser, token),
4773 ((pm_integer_t) { 0 })
4774 );
4775
4776 if (parser->integer.lexed) {
4777 // The value was already computed during lexing.
4778 node->value.value = parser->integer.value;
4779 parser->integer.lexed = false;
4780 } else {
4781 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4782 switch (base) {
4783 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4784 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4785 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4786 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4787 default: assert(false && "unreachable"); break;
4788 }
4789
4790 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4791 pm_integer_arena_move(parser->arena, &node->value);
4792 }
4793
4794 return node;
4795}
4796
4801static pm_imaginary_node_t *
4802pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4803 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4804
4805 return pm_imaginary_node_new(
4806 parser->arena,
4807 ++parser->node_id,
4808 PM_NODE_FLAG_STATIC_LITERAL,
4809 PM_LOCATION_INIT_TOKEN(parser, token),
4810 UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4811 .type = PM_TOKEN_INTEGER,
4812 .start = token->start,
4813 .end = token->end - 1
4814 })))
4815 );
4816}
4817
4822static pm_rational_node_t *
4823pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4824 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4825
4826 pm_rational_node_t *node = pm_rational_node_new(
4827 parser->arena,
4828 ++parser->node_id,
4829 base | PM_NODE_FLAG_STATIC_LITERAL,
4830 PM_LOCATION_INIT_TOKEN(parser, token),
4831 ((pm_integer_t) { 0 }),
4832 ((pm_integer_t) { .value = 1 })
4833 );
4834
4835 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4836 switch (base) {
4837 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4838 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4839 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4840 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4841 default: assert(false && "unreachable"); break;
4842 }
4843
4844 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4845 pm_integer_arena_move(parser->arena, &node->numerator);
4846
4847 return node;
4848}
4849
4854static pm_imaginary_node_t *
4855pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4856 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4857
4858 return pm_imaginary_node_new(
4859 parser->arena,
4860 ++parser->node_id,
4861 PM_NODE_FLAG_STATIC_LITERAL,
4862 PM_LOCATION_INIT_TOKEN(parser, token),
4863 UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4864 .type = PM_TOKEN_INTEGER_RATIONAL,
4865 .start = token->start,
4866 .end = token->end - 1
4867 })))
4868 );
4869}
4870
4874static pm_in_node_t *
4875pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4876 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4877 uint32_t end;
4878
4879 if (statements != NULL) {
4880 end = PM_NODE_END(statements);
4881 } else if (then_keyword != NULL) {
4882 end = PM_TOKEN_END(parser, then_keyword);
4883 } else {
4884 end = PM_NODE_END(pattern);
4885 }
4886
4887 return pm_in_node_new(
4888 parser->arena,
4889 ++parser->node_id,
4890 0,
4891 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4892 pattern,
4893 statements,
4894 TOK2LOC(parser, in_keyword),
4895 NTOK2LOC(parser, then_keyword)
4896 );
4897}
4898
4902static pm_instance_variable_and_write_node_t *
4903pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4904 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4905
4906 return pm_instance_variable_and_write_node_new(
4907 parser->arena,
4908 ++parser->node_id,
4909 0,
4910 PM_LOCATION_INIT_NODES(target, value),
4911 target->name,
4912 target->base.location,
4913 TOK2LOC(parser, operator),
4914 value
4915 );
4916}
4917
4921static pm_instance_variable_operator_write_node_t *
4922pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4923 return pm_instance_variable_operator_write_node_new(
4924 parser->arena,
4925 ++parser->node_id,
4926 0,
4927 PM_LOCATION_INIT_NODES(target, value),
4928 target->name,
4929 target->base.location,
4930 TOK2LOC(parser, operator),
4931 value,
4932 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4933 );
4934}
4935
4939static pm_instance_variable_or_write_node_t *
4940pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4941 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4942
4943 return pm_instance_variable_or_write_node_new(
4944 parser->arena,
4945 ++parser->node_id,
4946 0,
4947 PM_LOCATION_INIT_NODES(target, value),
4948 target->name,
4949 target->base.location,
4950 TOK2LOC(parser, operator),
4951 value
4952 );
4953}
4954
4958static pm_instance_variable_read_node_t *
4959pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4960 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4961
4962 return pm_instance_variable_read_node_new(
4963 parser->arena,
4964 ++parser->node_id,
4965 0,
4966 PM_LOCATION_INIT_TOKEN(parser, token),
4967 pm_parser_constant_id_token(parser, token)
4968 );
4969}
4970
4975static pm_instance_variable_write_node_t *
4976pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4977 return pm_instance_variable_write_node_new(
4978 parser->arena,
4979 ++parser->node_id,
4980 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4981 PM_LOCATION_INIT_NODES(read_node, value),
4982 read_node->name,
4983 read_node->base.location,
4984 value,
4985 TOK2LOC(parser, operator)
4986 );
4987}
4988
4994static void
4995pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4996 switch (PM_NODE_TYPE(part)) {
4997 case PM_STRING_NODE:
4998 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4999 break;
5000 case PM_EMBEDDED_STATEMENTS_NODE: {
5001 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5002 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5003
5004 if (embedded == NULL) {
5005 // If there are no statements or more than one statement, then
5006 // we lose the static literal flag.
5007 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5008 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5009 // If the embedded statement is a string, then we can keep the
5010 // static literal flag and mark the string as frozen.
5011 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5012 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5013 // If the embedded statement is an interpolated string and it's
5014 // a static literal, then we can keep the static literal flag.
5015 } else {
5016 // Otherwise we lose the static literal flag.
5017 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5018 }
5019
5020 break;
5021 }
5022 case PM_EMBEDDED_VARIABLE_NODE:
5023 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
5024 break;
5025 default:
5026 assert(false && "unexpected node type");
5027 break;
5028 }
5029
5030 pm_node_list_append(arena, parts, part);
5031}
5032
5036static pm_interpolated_regular_expression_node_t *
5037pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5038 return pm_interpolated_regular_expression_node_new(
5039 parser->arena,
5040 ++parser->node_id,
5041 PM_NODE_FLAG_STATIC_LITERAL,
5042 PM_LOCATION_INIT_TOKEN(parser, opening),
5043 TOK2LOC(parser, opening),
5044 ((pm_node_list_t) { 0 }),
5045 TOK2LOC(parser, opening)
5046 );
5047}
5048
5049static PRISM_INLINE void
5050pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5051 if (PM_NODE_START(node) > PM_NODE_START(part)) {
5052 PM_NODE_START_SET_NODE(node, part);
5053 }
5054 if (PM_NODE_END(node) < PM_NODE_END(part)) {
5055 PM_NODE_LENGTH_SET_NODE(node, part);
5056 }
5057
5058 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5059}
5060
5061static PRISM_INLINE void
5062pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5063 node->closing_loc = TOK2LOC(parser, closing);
5064 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5065 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
5066}
5067
5091static PRISM_INLINE void
5092pm_interpolated_string_node_append(pm_parser_t *parser, pm_interpolated_string_node_t *node, pm_node_t *part) {
5093 pm_arena_t *arena = parser->arena;
5094#define CLEAR_FLAGS(node) \
5095 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5096
5097#define MUTABLE_FLAGS(node) \
5098 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5099
5100 if (node->parts.size == 0 && node->opening_loc.length == 0) {
5101 PM_NODE_START_SET_NODE(node, part);
5102 }
5103
5104 if (PM_NODE_END(part) > PM_NODE_END(node)) {
5105 PM_NODE_LENGTH_SET_NODE(node, part);
5106 }
5107
5108 switch (PM_NODE_TYPE(part)) {
5109 case PM_STRING_NODE:
5110 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
5111 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
5112 // as long as this interpolation only consists of other string literals.
5113 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
5114 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
5115 }
5116 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5117 break;
5118 case PM_INTERPOLATED_STRING_NODE:
5119 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5120 // If the string that we're concatenating is a static literal,
5121 // then we can keep the static literal flag for this string.
5122 } else {
5123 // Otherwise, we lose the static literal flag here and we should
5124 // also clear the mutability flags.
5125 CLEAR_FLAGS(node);
5126 }
5127 break;
5128 case PM_EMBEDDED_STATEMENTS_NODE: {
5129 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5130 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5131
5132 if (embedded == NULL) {
5133 // If we're embedding multiple statements or no statements, then
5134 // the string is not longer a static literal.
5135 CLEAR_FLAGS(node);
5136 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5137 // If the embedded statement is a string, then we can make that
5138 // string as frozen and static literal, and not touch the static
5139 // literal status of this string.
5140 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5141
5142 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5143 MUTABLE_FLAGS(node);
5144 }
5145 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5146 // If the embedded statement is an interpolated string, but that
5147 // string is marked as static literal, then we can keep our
5148 // static literal status for this string.
5149 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5150 MUTABLE_FLAGS(node);
5151 }
5152 } else {
5153 // In all other cases, we lose the static literal flag here and
5154 // become mutable.
5155 CLEAR_FLAGS(node);
5156 }
5157
5158 break;
5159 }
5160 case PM_EMBEDDED_VARIABLE_NODE:
5161 // Embedded variables clear static literal, which means we also
5162 // should clear the mutability flags.
5163 CLEAR_FLAGS(node);
5164 break;
5165 case PM_X_STRING_NODE:
5166 case PM_INTERPOLATED_X_STRING_NODE:
5167 case PM_SYMBOL_NODE:
5168 case PM_INTERPOLATED_SYMBOL_NODE:
5169 // These will only happen in error cases. But we want to handle it
5170 // here so that we don't fail the assertion.
5171 CLEAR_FLAGS(node);
5172 pm_node_list_append(arena, &node->parts, UP(pm_error_recovery_node_create_unexpected(parser, part)));
5173 return;
5174 case PM_ERROR_RECOVERY_NODE:
5175 CLEAR_FLAGS(node);
5176 break;
5177 default:
5178 assert(false && "unexpected node type");
5179 break;
5180 }
5181
5182 pm_node_list_append(arena, &node->parts, part);
5183
5184#undef CLEAR_FLAGS
5185#undef MUTABLE_FLAGS
5186}
5187
5191static pm_interpolated_string_node_t *
5192pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5193 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5194
5195 switch (parser->frozen_string_literal) {
5196 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5197 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5198 break;
5199 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5200 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5201 break;
5202 }
5203
5204 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5205 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5206
5207 pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
5208 parser->arena,
5209 ++parser->node_id,
5210 flags,
5211 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5212 NTOK2LOC(parser, opening),
5213 ((pm_node_list_t) { 0 }),
5214 NTOK2LOC(parser, closing)
5215 );
5216
5217 if (parts != NULL) {
5218 pm_node_t *part;
5219 PM_NODE_LIST_FOREACH(parts, index, part) {
5220 pm_interpolated_string_node_append(parser, node, part);
5221 }
5222 }
5223
5224 return node;
5225}
5226
5230static void
5231pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5232 node->closing_loc = TOK2LOC(parser, closing);
5233 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5234}
5235
5236static void
5237pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5238 if (node->parts.size == 0 && node->opening_loc.length == 0) {
5239 PM_NODE_START_SET_NODE(node, part);
5240 }
5241
5242 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5243
5244 if (PM_NODE_END(part) > PM_NODE_END(node)) {
5245 PM_NODE_LENGTH_SET_NODE(node, part);
5246 }
5247}
5248
5249static void
5250pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5251 node->closing_loc = TOK2LOC(parser, closing);
5252 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5253}
5254
5258static pm_interpolated_symbol_node_t *
5259pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5260 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
5261 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
5262
5263 pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
5264 parser->arena,
5265 ++parser->node_id,
5266 PM_NODE_FLAG_STATIC_LITERAL,
5267 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5268 NTOK2LOC(parser, opening),
5269 ((pm_node_list_t) { 0 }),
5270 NTOK2LOC(parser, closing)
5271 );
5272
5273 if (parts != NULL) {
5274 pm_node_t *part;
5275 PM_NODE_LIST_FOREACH(parts, index, part) {
5276 pm_interpolated_symbol_node_append(parser->arena, node, part);
5277 }
5278 }
5279
5280 return node;
5281}
5282
5286static pm_interpolated_x_string_node_t *
5287pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5288 return pm_interpolated_x_string_node_new(
5289 parser->arena,
5290 ++parser->node_id,
5291 0,
5292 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5293 TOK2LOC(parser, opening),
5294 ((pm_node_list_t) { 0 }),
5295 TOK2LOC(parser, closing)
5296 );
5297}
5298
5299static PRISM_INLINE void
5300pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5301 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5302 PM_NODE_LENGTH_SET_NODE(node, part);
5303}
5304
5305static PRISM_INLINE void
5306pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5307 node->closing_loc = TOK2LOC(parser, closing);
5308 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5309}
5310
5314static pm_it_local_variable_read_node_t *
5315pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5316 return pm_it_local_variable_read_node_new(
5317 parser->arena,
5318 ++parser->node_id,
5319 0,
5320 PM_LOCATION_INIT_TOKEN(parser, name)
5321 );
5322}
5323
5327static pm_it_parameters_node_t *
5328pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5329 return pm_it_parameters_node_new(
5330 parser->arena,
5331 ++parser->node_id,
5332 0,
5333 PM_LOCATION_INIT_TOKENS(parser, opening, closing)
5334 );
5335}
5336
5340static pm_keyword_hash_node_t *
5341pm_keyword_hash_node_create(pm_parser_t *parser) {
5342 return pm_keyword_hash_node_new(
5343 parser->arena,
5344 ++parser->node_id,
5345 PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5346 PM_LOCATION_INIT_UNSET,
5347 ((pm_node_list_t) { 0 })
5348 );
5349}
5350
5354static void
5355pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
5356 // If the element being added is not an AssocNode or does not have a symbol
5357 // key, then we want to turn the SYMBOL_KEYS flag off.
5358 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5359 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5360 }
5361
5362 pm_node_list_append(arena, &hash->elements, element);
5363 if (PM_NODE_LENGTH(hash) == 0) {
5364 PM_NODE_START_SET_NODE(hash, element);
5365 }
5366 PM_NODE_LENGTH_SET_NODE(hash, element);
5367}
5368
5372static pm_required_keyword_parameter_node_t *
5373pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5374 return pm_required_keyword_parameter_node_new(
5375 parser->arena,
5376 ++parser->node_id,
5377 0,
5378 PM_LOCATION_INIT_TOKEN(parser, name),
5379 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5380 TOK2LOC(parser, name)
5381 );
5382}
5383
5387static pm_optional_keyword_parameter_node_t *
5388pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5389 return pm_optional_keyword_parameter_node_new(
5390 parser->arena,
5391 ++parser->node_id,
5392 0,
5393 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5394 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5395 TOK2LOC(parser, name),
5396 value
5397 );
5398}
5399
5403static pm_keyword_rest_parameter_node_t *
5404pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5405 return pm_keyword_rest_parameter_node_new(
5406 parser->arena,
5407 ++parser->node_id,
5408 0,
5409 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
5410 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5411 NTOK2LOC(parser, name),
5412 TOK2LOC(parser, operator)
5413 );
5414}
5415
5419static pm_lambda_node_t *
5420pm_lambda_node_create(
5421 pm_parser_t *parser,
5422 pm_constant_id_list_t *locals,
5423 const pm_token_t *operator,
5424 const pm_token_t *opening,
5425 const pm_token_t *closing,
5426 pm_node_t *parameters,
5427 pm_node_t *body
5428) {
5429 return pm_lambda_node_new(
5430 parser->arena,
5431 ++parser->node_id,
5432 0,
5433 PM_LOCATION_INIT_TOKENS(parser, operator, closing),
5434 *locals,
5435 TOK2LOC(parser, operator),
5436 TOK2LOC(parser, opening),
5437 TOK2LOC(parser, closing),
5438 parameters,
5439 body
5440 );
5441}
5442
5446static pm_local_variable_and_write_node_t *
5447pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5448 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5449 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5450
5451 return pm_local_variable_and_write_node_new(
5452 parser->arena,
5453 ++parser->node_id,
5454 0,
5455 PM_LOCATION_INIT_NODES(target, value),
5456 target->location,
5457 TOK2LOC(parser, operator),
5458 value,
5459 name,
5460 depth
5461 );
5462}
5463
5467static pm_local_variable_operator_write_node_t *
5468pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5469 return pm_local_variable_operator_write_node_new(
5470 parser->arena,
5471 ++parser->node_id,
5472 0,
5473 PM_LOCATION_INIT_NODES(target, value),
5474 target->location,
5475 TOK2LOC(parser, operator),
5476 value,
5477 name,
5478 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5479 depth
5480 );
5481}
5482
5486static pm_local_variable_or_write_node_t *
5487pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5488 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5489 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5490
5491 return pm_local_variable_or_write_node_new(
5492 parser->arena,
5493 ++parser->node_id,
5494 0,
5495 PM_LOCATION_INIT_NODES(target, value),
5496 target->location,
5497 TOK2LOC(parser, operator),
5498 value,
5499 name,
5500 depth
5501 );
5502}
5503
5507static pm_local_variable_read_node_t *
5508pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5509 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5510
5511 return pm_local_variable_read_node_new(
5512 parser->arena,
5513 ++parser->node_id,
5514 0,
5515 PM_LOCATION_INIT_TOKEN(parser, name),
5516 name_id,
5517 depth
5518 );
5519}
5520
5524static pm_local_variable_read_node_t *
5525pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5526 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5527 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5528}
5529
5534static pm_local_variable_read_node_t *
5535pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5536 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5537 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5538}
5539
5543static pm_local_variable_write_node_t *
5544pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5545 return pm_local_variable_write_node_new(
5546 parser->arena,
5547 ++parser->node_id,
5548 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5549 ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
5550 name,
5551 depth,
5552 *name_loc,
5553 value,
5554 TOK2LOC(parser, operator)
5555 );
5556}
5557
5561static PRISM_INLINE bool
5562pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5563 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5564}
5565
5570static PRISM_INLINE bool
5571pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5572 return (
5573 (length == 2) &&
5574 (parser->start[start] == '_') &&
5575 (parser->start[start + 1] != '0') &&
5576 pm_char_is_decimal_digit(parser->start[start + 1])
5577 );
5578}
5579
5584static PRISM_INLINE void
5585pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5586 if (pm_token_is_numbered_parameter(parser, start, length)) {
5587 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5588 }
5589}
5590
5595static pm_local_variable_target_node_t *
5596pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5597 pm_refute_numbered_parameter(parser, location->start, location->length);
5598
5599 return pm_local_variable_target_node_new(
5600 parser->arena,
5601 ++parser->node_id,
5602 0,
5603 ((pm_location_t) { .start = location->start, .length = location->length }),
5604 name,
5605 depth
5606 );
5607}
5608
5612static pm_match_predicate_node_t *
5613pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5614 pm_assert_value_expression(parser, value);
5615
5616 return pm_match_predicate_node_new(
5617 parser->arena,
5618 ++parser->node_id,
5619 0,
5620 PM_LOCATION_INIT_NODES(value, pattern),
5621 value,
5622 pattern,
5623 TOK2LOC(parser, operator)
5624 );
5625}
5626
5630static pm_match_required_node_t *
5631pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5632 pm_assert_value_expression(parser, value);
5633
5634 return pm_match_required_node_new(
5635 parser->arena,
5636 ++parser->node_id,
5637 0,
5638 PM_LOCATION_INIT_NODES(value, pattern),
5639 value,
5640 pattern,
5641 TOK2LOC(parser, operator)
5642 );
5643}
5644
5648static pm_match_write_node_t *
5649pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5650 return pm_match_write_node_new(
5651 parser->arena,
5652 ++parser->node_id,
5653 0,
5654 PM_LOCATION_INIT_NODE(call),
5655 call,
5656 ((pm_node_list_t) { 0 })
5657 );
5658}
5659
5663static pm_module_node_t *
5664pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5665 return pm_module_node_new(
5666 parser->arena,
5667 ++parser->node_id,
5668 0,
5669 PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
5670 (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5671 TOK2LOC(parser, module_keyword),
5672 constant_path,
5673 body,
5674 TOK2LOC(parser, end_keyword),
5675 pm_parser_constant_id_token(parser, name)
5676 );
5677}
5678
5682static pm_multi_target_node_t *
5683pm_multi_target_node_create(pm_parser_t *parser) {
5684 return pm_multi_target_node_new(
5685 parser->arena,
5686 ++parser->node_id,
5687 0,
5688 PM_LOCATION_INIT_UNSET,
5689 ((pm_node_list_t) { 0 }),
5690 NULL,
5691 ((pm_node_list_t) { 0 }),
5692 ((pm_location_t) { 0 }),
5693 ((pm_location_t) { 0 })
5694 );
5695}
5696
5700static void
5701pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5702 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5703 if (node->rest == NULL) {
5704 node->rest = target;
5705 } else {
5706 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5707 pm_node_list_append(parser->arena, &node->rights, target);
5708 }
5709 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5710 if (node->rest == NULL) {
5711 node->rest = target;
5712 } else {
5713 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5714 pm_node_list_append(parser->arena, &node->rights, target);
5715 }
5716 } else if (node->rest == NULL) {
5717 pm_node_list_append(parser->arena, &node->lefts, target);
5718 } else {
5719 pm_node_list_append(parser->arena, &node->rights, target);
5720 }
5721
5722 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5723 PM_NODE_START_SET_NODE(node, target);
5724 }
5725
5726 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5727 PM_NODE_LENGTH_SET_NODE(node, target);
5728 }
5729}
5730
5734static void
5735pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5736 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5737 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5738 node->lparen_loc = TOK2LOC(parser, lparen);
5739}
5740
5744static void
5745pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5746 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5747 node->rparen_loc = TOK2LOC(parser, rparen);
5748}
5749
5753static pm_multi_write_node_t *
5754pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5755 /* The target is no longer necessary because we have reused its children. It
5756 * is arena-allocated so no explicit free is needed. */
5757 return pm_multi_write_node_new(
5758 parser->arena,
5759 ++parser->node_id,
5760 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5761 PM_LOCATION_INIT_NODES(target, value),
5762 target->lefts,
5763 target->rest,
5764 target->rights,
5765 target->lparen_loc,
5766 target->rparen_loc,
5767 TOK2LOC(parser, operator),
5768 value
5769 );
5770}
5771
5775static pm_next_node_t *
5776pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5777 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5778
5779 return pm_next_node_new(
5780 parser->arena,
5781 ++parser->node_id,
5782 0,
5783 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
5784 arguments,
5785 TOK2LOC(parser, keyword)
5786 );
5787}
5788
5792static pm_nil_node_t *
5793pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5794 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5795
5796 return pm_nil_node_new(
5797 parser->arena,
5798 ++parser->node_id,
5799 PM_NODE_FLAG_STATIC_LITERAL,
5800 PM_LOCATION_INIT_TOKEN(parser, token)
5801 );
5802}
5803
5807static pm_no_block_parameter_node_t *
5808pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5809 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5810 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5811
5812 return pm_no_block_parameter_node_new(
5813 parser->arena,
5814 ++parser->node_id,
5815 0,
5816 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5817 TOK2LOC(parser, operator),
5818 TOK2LOC(parser, keyword)
5819 );
5820}
5821
5825static pm_no_keywords_parameter_node_t *
5826pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5827 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5828 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5829
5830 return pm_no_keywords_parameter_node_new(
5831 parser->arena,
5832 ++parser->node_id,
5833 0,
5834 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5835 TOK2LOC(parser, operator),
5836 TOK2LOC(parser, keyword)
5837 );
5838}
5839
5843static pm_numbered_parameters_node_t *
5844pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5845 return pm_numbered_parameters_node_new(
5846 parser->arena,
5847 ++parser->node_id,
5848 0,
5849 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5850 maximum
5851 );
5852}
5853
5858#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5859
5866static uint32_t
5867pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5868 const uint8_t *start = token->start + 1;
5869 const uint8_t *end = token->end;
5870
5871 ptrdiff_t diff = end - start;
5872 assert(diff > 0);
5873#if PTRDIFF_MAX > SIZE_MAX
5874 assert(diff < (ptrdiff_t) SIZE_MAX);
5875#endif
5876 size_t length = (size_t) diff;
5877
5878 char *digits = xcalloc(length + 1, sizeof(char));
5879 memcpy(digits, start, length);
5880 digits[length] = '\0';
5881
5882 char *endptr;
5883 errno = 0;
5884 unsigned long value = strtoul(digits, &endptr, 10);
5885
5886 if ((digits == endptr) || (*endptr != '\0')) {
5887 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5888 value = 0;
5889 }
5890
5891 xfree_sized(digits, sizeof(char) * (length + 1));
5892
5893 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5894 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5895 value = 0;
5896 }
5897
5898 return (uint32_t) value;
5899}
5900
5901#undef NTH_REF_MAX
5902
5906static pm_numbered_reference_read_node_t *
5907pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5908 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5909
5910 return pm_numbered_reference_read_node_new(
5911 parser->arena,
5912 ++parser->node_id,
5913 0,
5914 PM_LOCATION_INIT_TOKEN(parser, name),
5915 pm_numbered_reference_read_node_number(parser, name)
5916 );
5917}
5918
5922static pm_optional_parameter_node_t *
5923pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5924 return pm_optional_parameter_node_new(
5925 parser->arena,
5926 ++parser->node_id,
5927 0,
5928 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5929 pm_parser_constant_id_token(parser, name),
5930 TOK2LOC(parser, name),
5931 TOK2LOC(parser, operator),
5932 value
5933 );
5934}
5935
5939static pm_or_node_t *
5940pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5941 pm_assert_value_expression(parser, left);
5942
5943 return pm_or_node_new(
5944 parser->arena,
5945 ++parser->node_id,
5946 0,
5947 PM_LOCATION_INIT_NODES(left, right),
5948 left,
5949 right,
5950 TOK2LOC(parser, operator)
5951 );
5952}
5953
5957static pm_parameters_node_t *
5958pm_parameters_node_create(pm_parser_t *parser) {
5959 return pm_parameters_node_new(
5960 parser->arena,
5961 ++parser->node_id,
5962 0,
5963 PM_LOCATION_INIT_UNSET,
5964 ((pm_node_list_t) { 0 }),
5965 ((pm_node_list_t) { 0 }),
5966 NULL,
5967 ((pm_node_list_t) { 0 }),
5968 ((pm_node_list_t) { 0 }),
5969 NULL,
5970 NULL
5971 );
5972}
5973
5977static void
5978pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5979 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5980 PM_NODE_START_SET_NODE(params, param);
5981 }
5982
5983 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5984 PM_NODE_LENGTH_SET_NODE(params, param);
5985 }
5986}
5987
5991static void
5992pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5993 pm_parameters_node_location_set(params, param);
5994 pm_node_list_append(arena, &params->requireds, param);
5995}
5996
6000static void
6001pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6002 pm_parameters_node_location_set(params, UP(param));
6003 pm_node_list_append(arena, &params->optionals, UP(param));
6004}
6005
6009static void
6010pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
6011 pm_parameters_node_location_set(params, param);
6012 pm_node_list_append(arena, &params->posts, param);
6013}
6014
6018static void
6019pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6020 pm_parameters_node_location_set(params, param);
6021 params->rest = param;
6022}
6023
6027static void
6028pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
6029 pm_parameters_node_location_set(params, param);
6030 pm_node_list_append(arena, &params->keywords, param);
6031}
6032
6036static void
6037pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6038 assert(params->keyword_rest == NULL);
6039 pm_parameters_node_location_set(params, param);
6040 params->keyword_rest = param;
6041}
6042
6046static void
6047pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
6048 assert(params->block == NULL);
6049 pm_parameters_node_location_set(params, param);
6050 params->block = param;
6051}
6052
6056static pm_program_node_t *
6057pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6058 return pm_program_node_new(
6059 parser->arena,
6060 ++parser->node_id,
6061 0,
6062 PM_LOCATION_INIT_NODE(statements),
6063 *locals,
6064 statements
6065 );
6066}
6067
6071static pm_parentheses_node_t *
6072pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
6073 return pm_parentheses_node_new(
6074 parser->arena,
6075 ++parser->node_id,
6076 flags,
6077 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6078 body,
6079 TOK2LOC(parser, opening),
6080 TOK2LOC(parser, closing)
6081 );
6082}
6083
6087static pm_pinned_expression_node_t *
6088pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6089 return pm_pinned_expression_node_new(
6090 parser->arena,
6091 ++parser->node_id,
6092 0,
6093 PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
6094 expression,
6095 TOK2LOC(parser, operator),
6096 TOK2LOC(parser, lparen),
6097 TOK2LOC(parser, rparen)
6098 );
6099}
6100
6104static pm_pinned_variable_node_t *
6105pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6106 return pm_pinned_variable_node_new(
6107 parser->arena,
6108 ++parser->node_id,
6109 0,
6110 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
6111 variable,
6112 TOK2LOC(parser, operator)
6113 );
6114}
6115
6119static pm_post_execution_node_t *
6120pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6121 return pm_post_execution_node_new(
6122 parser->arena,
6123 ++parser->node_id,
6124 0,
6125 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6126 statements,
6127 TOK2LOC(parser, keyword),
6128 TOK2LOC(parser, opening),
6129 TOK2LOC(parser, closing)
6130 );
6131}
6132
6136static pm_pre_execution_node_t *
6137pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6138 return pm_pre_execution_node_new(
6139 parser->arena,
6140 ++parser->node_id,
6141 0,
6142 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6143 statements,
6144 TOK2LOC(parser, keyword),
6145 TOK2LOC(parser, opening),
6146 TOK2LOC(parser, closing)
6147 );
6148}
6149
6153static pm_range_node_t *
6154pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6155 pm_assert_value_expression(parser, left);
6156 pm_assert_value_expression(parser, right);
6157 pm_node_flags_t flags = 0;
6158
6159 // Indicate that this node is an exclusive range if the operator is `...`.
6160 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6161 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6162 }
6163
6164 // Indicate that this node is a static literal (i.e., can be compiled with
6165 // a putobject in CRuby) if the left and right are implicit nil, explicit
6166 // nil, or integers.
6167 if (
6168 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6169 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6170 ) {
6171 flags |= PM_NODE_FLAG_STATIC_LITERAL;
6172 }
6173
6174 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
6175 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
6176
6177 return pm_range_node_new(
6178 parser->arena,
6179 ++parser->node_id,
6180 flags,
6181 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6182 left,
6183 right,
6184 TOK2LOC(parser, operator)
6185 );
6186}
6187
6191static pm_redo_node_t *
6192pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6193 assert(token->type == PM_TOKEN_KEYWORD_REDO);
6194
6195 return pm_redo_node_new(
6196 parser->arena,
6197 ++parser->node_id,
6198 0,
6199 PM_LOCATION_INIT_TOKEN(parser, token)
6200 );
6201}
6202
6207static pm_regular_expression_node_t *
6208pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6209 return pm_regular_expression_node_new(
6210 parser->arena,
6211 ++parser->node_id,
6212 pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6213 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6214 TOK2LOC(parser, opening),
6215 TOK2LOC(parser, content),
6216 TOK2LOC(parser, closing),
6217 *unescaped
6218 );
6219}
6220
6224static PRISM_INLINE pm_regular_expression_node_t *
6225pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6226 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6227}
6228
6232static pm_required_parameter_node_t *
6233pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6234 return pm_required_parameter_node_new(
6235 parser->arena,
6236 ++parser->node_id,
6237 0,
6238 PM_LOCATION_INIT_TOKEN(parser, token),
6239 pm_parser_constant_id_token(parser, token)
6240 );
6241}
6242
6246static pm_rescue_modifier_node_t *
6247pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6248 return pm_rescue_modifier_node_new(
6249 parser->arena,
6250 ++parser->node_id,
6251 0,
6252 PM_LOCATION_INIT_NODES(expression, rescue_expression),
6253 expression,
6254 TOK2LOC(parser, keyword),
6255 rescue_expression
6256 );
6257}
6258
6262static pm_rescue_node_t *
6263pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6264 return pm_rescue_node_new(
6265 parser->arena,
6266 ++parser->node_id,
6267 0,
6268 PM_LOCATION_INIT_TOKEN(parser, keyword),
6269 TOK2LOC(parser, keyword),
6270 ((pm_node_list_t) { 0 }),
6271 ((pm_location_t) { 0 }),
6272 NULL,
6273 ((pm_location_t) { 0 }),
6274 NULL,
6275 NULL
6276 );
6277}
6278
6279static PRISM_INLINE void
6280pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
6281 node->operator_loc = TOK2LOC(parser, operator);
6282}
6283
6287static void
6288pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6289 node->reference = reference;
6290 PM_NODE_LENGTH_SET_NODE(node, reference);
6291}
6292
6296static void
6297pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6298 node->statements = statements;
6299 if (pm_statements_node_body_length(statements) > 0) {
6300 PM_NODE_LENGTH_SET_NODE(node, statements);
6301 }
6302}
6303
6307static void
6308pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6309 node->subsequent = subsequent;
6310 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6311}
6312
6316static void
6317pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
6318 pm_node_list_append(arena, &node->exceptions, exception);
6319 PM_NODE_LENGTH_SET_NODE(node, exception);
6320}
6321
6325static pm_rest_parameter_node_t *
6326pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6327 return pm_rest_parameter_node_new(
6328 parser->arena,
6329 ++parser->node_id,
6330 0,
6331 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
6332 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6333 NTOK2LOC(parser, name),
6334 TOK2LOC(parser, operator)
6335 );
6336}
6337
6341static pm_retry_node_t *
6342pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6343 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6344
6345 return pm_retry_node_new(
6346 parser->arena,
6347 ++parser->node_id,
6348 0,
6349 PM_LOCATION_INIT_TOKEN(parser, token)
6350 );
6351}
6352
6356static pm_return_node_t *
6357pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6358 return pm_return_node_new(
6359 parser->arena,
6360 ++parser->node_id,
6361 0,
6362 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
6363 TOK2LOC(parser, keyword),
6364 arguments
6365 );
6366}
6367
6371static pm_self_node_t *
6372pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6373 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6374
6375 return pm_self_node_new(
6376 parser->arena,
6377 ++parser->node_id,
6378 0,
6379 PM_LOCATION_INIT_TOKEN(parser, token)
6380 );
6381}
6382
6386static pm_shareable_constant_node_t *
6387pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6388 return pm_shareable_constant_node_new(
6389 parser->arena,
6390 ++parser->node_id,
6391 (pm_node_flags_t) value,
6392 PM_LOCATION_INIT_NODE(write),
6393 write
6394 );
6395}
6396
6400static pm_singleton_class_node_t *
6401pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6402 return pm_singleton_class_node_new(
6403 parser->arena,
6404 ++parser->node_id,
6405 0,
6406 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
6407 *locals,
6408 TOK2LOC(parser, class_keyword),
6409 TOK2LOC(parser, operator),
6410 expression,
6411 body,
6412 TOK2LOC(parser, end_keyword)
6413 );
6414}
6415
6419static pm_source_encoding_node_t *
6420pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6421 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6422
6423 return pm_source_encoding_node_new(
6424 parser->arena,
6425 ++parser->node_id,
6426 PM_NODE_FLAG_STATIC_LITERAL,
6427 PM_LOCATION_INIT_TOKEN(parser, token)
6428 );
6429}
6430
6434static pm_source_file_node_t*
6435pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6436 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6437
6438 pm_node_flags_t flags = 0;
6439
6440 switch (parser->frozen_string_literal) {
6441 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6442 flags |= PM_STRING_FLAGS_MUTABLE;
6443 break;
6444 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6445 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6446 break;
6447 }
6448
6449 return pm_source_file_node_new(
6450 parser->arena,
6451 ++parser->node_id,
6452 flags,
6453 PM_LOCATION_INIT_TOKEN(parser, file_keyword),
6454 parser->filepath
6455 );
6456}
6457
6461static pm_source_line_node_t *
6462pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6463 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6464
6465 return pm_source_line_node_new(
6466 parser->arena,
6467 ++parser->node_id,
6468 PM_NODE_FLAG_STATIC_LITERAL,
6469 PM_LOCATION_INIT_TOKEN(parser, token)
6470 );
6471}
6472
6476static pm_splat_node_t *
6477pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6478 return pm_splat_node_new(
6479 parser->arena,
6480 ++parser->node_id,
6481 0,
6482 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
6483 TOK2LOC(parser, operator),
6484 expression
6485 );
6486}
6487
6491static pm_statements_node_t *
6492pm_statements_node_create(pm_parser_t *parser) {
6493 return pm_statements_node_new(
6494 parser->arena,
6495 ++parser->node_id,
6496 0,
6497 PM_LOCATION_INIT_UNSET,
6498 ((pm_node_list_t) { 0 })
6499 );
6500}
6501
6505static size_t
6506pm_statements_node_body_length(pm_statements_node_t *node) {
6507 return node && node->body.size;
6508}
6509
6514static PRISM_INLINE void
6515pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6516 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6517 PM_NODE_START_SET_NODE(node, statement);
6518 }
6519
6520 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6521 PM_NODE_LENGTH_SET_NODE(node, statement);
6522 }
6523}
6524
6528static void
6529pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6530 pm_statements_node_body_update(node, statement);
6531
6532 if (node->body.size > 0) {
6533 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6534
6535 switch (PM_NODE_TYPE(previous)) {
6536 case PM_BREAK_NODE:
6537 case PM_NEXT_NODE:
6538 case PM_REDO_NODE:
6539 case PM_RETRY_NODE:
6540 case PM_RETURN_NODE:
6541 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6542 break;
6543 default:
6544 break;
6545 }
6546 }
6547
6548 pm_node_list_append(parser->arena, &node->body, statement);
6549 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6550}
6551
6555static void
6556pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
6557 pm_statements_node_body_update(node, statement);
6558 pm_node_list_prepend(arena, &node->body, statement);
6559 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6560}
6561
6565static PRISM_INLINE pm_string_node_t *
6566pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6567 pm_node_flags_t flags = 0;
6568
6569 switch (parser->frozen_string_literal) {
6570 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6571 flags = PM_STRING_FLAGS_MUTABLE;
6572 break;
6573 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6574 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6575 break;
6576 }
6577
6578 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6579 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6580
6581 return pm_string_node_new(
6582 parser->arena,
6583 ++parser->node_id,
6584 flags,
6585 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6586 NTOK2LOC(parser, opening),
6587 TOK2LOC(parser, content),
6588 NTOK2LOC(parser, closing),
6589 *string
6590 );
6591}
6592
6596static pm_string_node_t *
6597pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6598 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6599}
6600
6605static pm_string_node_t *
6606pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6608 parser->current_string = PM_STRING_EMPTY;
6609 return node;
6610}
6611
6615static pm_super_node_t *
6616pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6617 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6618
6619 const pm_location_t *end = pm_arguments_end(arguments);
6620 assert(end != NULL && "unreachable");
6621
6622 return pm_super_node_new(
6623 parser->arena,
6624 ++parser->node_id,
6625 0,
6626 ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
6627 TOK2LOC(parser, keyword),
6628 arguments->opening_loc,
6629 arguments->arguments,
6630 arguments->closing_loc,
6631 arguments->block
6632 );
6633}
6634
6639static bool
6640pm_ascii_only_p(const pm_string_t *contents) {
6641 const size_t length = pm_string_length(contents);
6642 const uint8_t *source = pm_string_source(contents);
6643
6644 for (size_t index = 0; index < length; index++) {
6645 if (source[index] & 0x80) return false;
6646 }
6647
6648 return true;
6649}
6650
6654static void
6655parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6656 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6657 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6658
6659 if (width == 0) {
6660 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6661 break;
6662 }
6663
6664 cursor += width;
6665 }
6666}
6667
6672static void
6673parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6674 const pm_encoding_t *encoding = parser->encoding;
6675
6676 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6677 size_t width = encoding->char_width(cursor, end - cursor);
6678
6679 if (width == 0) {
6680 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6681 break;
6682 }
6683
6684 cursor += width;
6685 }
6686}
6687
6697static PRISM_INLINE pm_node_flags_t
6698parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6699 if (parser->explicit_encoding != NULL) {
6700 // A Symbol may optionally have its encoding explicitly set. This will
6701 // happen if an escape sequence results in a non-ASCII code point.
6702 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6703 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6704 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6705 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6706 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6707 } else if (validate) {
6708 parse_symbol_encoding_validate_other(parser, location, contents);
6709 }
6710 } else if (pm_ascii_only_p(contents)) {
6711 // Ruby stipulates that all source files must use an ASCII-compatible
6712 // encoding. Thus, all symbols appearing in source are eligible for
6713 // "downgrading" to US-ASCII.
6714 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6715 } else if (validate) {
6716 parse_symbol_encoding_validate_other(parser, location, contents);
6717 }
6718
6719 return 0;
6720}
6721
6726static pm_symbol_node_t *
6727pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6728 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6729 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6730
6731 return pm_symbol_node_new(
6732 parser->arena,
6733 ++parser->node_id,
6734 PM_NODE_FLAG_STATIC_LITERAL | flags,
6735 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6736 NTOK2LOC(parser, opening),
6737 NTOK2LOC(parser, value),
6738 NTOK2LOC(parser, closing),
6739 *unescaped
6740 );
6741}
6742
6746static PRISM_INLINE pm_symbol_node_t *
6747pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6748 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6749}
6750
6754static pm_symbol_node_t *
6755pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6756 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6757 parser->current_string = PM_STRING_EMPTY;
6758 return node;
6759}
6760
6764static pm_symbol_node_t *
6765pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6766 assert(token->type == PM_TOKEN_LABEL);
6767
6768 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6769 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6770 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6771
6772 assert((label.end - label.start) >= 0);
6773 pm_string_shared_init(&node->unescaped, label.start, label.end);
6774 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6775
6776 return node;
6777}
6778
6782static pm_symbol_node_t *
6783pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6784 pm_symbol_node_t *node = pm_symbol_node_new(
6785 parser->arena,
6786 ++parser->node_id,
6787 PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
6788 PM_LOCATION_INIT_UNSET,
6789 ((pm_location_t) { 0 }),
6790 ((pm_location_t) { 0 }),
6791 ((pm_location_t) { 0 }),
6792 ((pm_string_t) { 0 })
6793 );
6794
6795 pm_string_constant_init(&node->unescaped, content, strlen(content));
6796 return node;
6797}
6798
6802static bool
6803pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6804 const pm_location_t *location = NULL;
6805
6806 switch (PM_NODE_TYPE(node)) {
6807 case PM_SYMBOL_NODE: {
6808 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6809 if (cast->closing_loc.length > 0) {
6810 location = &cast->closing_loc;
6811 }
6812 break;
6813 }
6814 case PM_INTERPOLATED_SYMBOL_NODE: {
6815 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6816 if (cast->closing_loc.length > 0) {
6817 location = &cast->closing_loc;
6818 }
6819 break;
6820 }
6821 default:
6822 return false;
6823 }
6824
6825 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6826}
6827
6831static pm_symbol_node_t *
6832pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6833 pm_symbol_node_t *new_node = pm_symbol_node_new(
6834 parser->arena,
6835 ++parser->node_id,
6836 PM_NODE_FLAG_STATIC_LITERAL,
6837 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6838 TOK2LOC(parser, opening),
6839 node->content_loc,
6840 TOK2LOC(parser, closing),
6841 node->unescaped
6842 );
6843
6844 pm_token_t content = {
6845 .type = PM_TOKEN_IDENTIFIER,
6846 .start = parser->start + node->content_loc.start,
6847 .end = parser->start + node->content_loc.start + node->content_loc.length
6848 };
6849
6850 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6851
6852 /* The old node is arena-allocated so no explicit free is needed. */
6853 return new_node;
6854}
6855
6859static pm_string_node_t *
6860pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6861 pm_node_flags_t flags = 0;
6862
6863 switch (parser->frozen_string_literal) {
6864 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6865 flags = PM_STRING_FLAGS_MUTABLE;
6866 break;
6867 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6868 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6869 break;
6870 }
6871
6872 pm_string_node_t *new_node = pm_string_node_new(
6873 parser->arena,
6874 ++parser->node_id,
6875 flags,
6876 PM_LOCATION_INIT_NODE(node),
6877 node->opening_loc,
6878 node->value_loc,
6879 node->closing_loc,
6880 node->unescaped
6881 );
6882
6883 /* The old node is arena-allocated so no explicit free is needed. */
6884 return new_node;
6885}
6886
6890static pm_true_node_t *
6891pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6892 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6893
6894 return pm_true_node_new(
6895 parser->arena,
6896 ++parser->node_id,
6897 PM_NODE_FLAG_STATIC_LITERAL,
6898 PM_LOCATION_INIT_TOKEN(parser, token)
6899 );
6900}
6901
6905static pm_true_node_t *
6906pm_true_node_synthesized_create(pm_parser_t *parser) {
6907 return pm_true_node_new(
6908 parser->arena,
6909 ++parser->node_id,
6910 PM_NODE_FLAG_STATIC_LITERAL,
6911 PM_LOCATION_INIT_UNSET
6912 );
6913}
6914
6918static pm_undef_node_t *
6919pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6920 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6921
6922 return pm_undef_node_new(
6923 parser->arena,
6924 ++parser->node_id,
6925 0,
6926 PM_LOCATION_INIT_TOKEN(parser, token),
6927 ((pm_node_list_t) { 0 }),
6928 TOK2LOC(parser, token)
6929 );
6930}
6931
6935static void
6936pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
6937 PM_NODE_LENGTH_SET_NODE(node, name);
6938 pm_node_list_append(arena, &node->names, name);
6939}
6940
6944static pm_unless_node_t *
6945pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6946 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6947 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6948
6949 return pm_unless_node_new(
6950 parser->arena,
6951 ++parser->node_id,
6952 PM_NODE_FLAG_NEWLINE,
6953 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
6954 TOK2LOC(parser, keyword),
6955 predicate,
6956 NTOK2LOC(parser, then_keyword),
6957 statements,
6958 NULL,
6959 ((pm_location_t) { 0 })
6960 );
6961}
6962
6966static pm_unless_node_t *
6967pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6968 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6969
6970 pm_statements_node_t *statements = pm_statements_node_create(parser);
6971 pm_statements_node_body_append(parser, statements, statement, true);
6972
6973 return pm_unless_node_new(
6974 parser->arena,
6975 ++parser->node_id,
6976 PM_NODE_FLAG_NEWLINE,
6977 PM_LOCATION_INIT_NODES(statement, predicate),
6978 TOK2LOC(parser, unless_keyword),
6979 predicate,
6980 ((pm_location_t) { 0 }),
6981 statements,
6982 NULL,
6983 ((pm_location_t) { 0 })
6984 );
6985}
6986
6987static PRISM_INLINE void
6988pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6989 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6990 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6991}
6992
6998static void
6999pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7000 assert(parser->current_block_exits != NULL);
7001
7002 // All of the block exits that we want to remove should be within the
7003 // statements, and since we are modifying the statements, we shouldn't have
7004 // to check the end location.
7005 uint32_t start = statements->base.location.start;
7006
7007 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7008 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7009 if (block_exit->location.start < start) break;
7010
7011 // Implicitly remove from the list by lowering the size.
7012 parser->current_block_exits->size--;
7013 }
7014}
7015
7019static pm_until_node_t *
7020pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7021 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7022
7023 return pm_until_node_new(
7024 parser->arena,
7025 ++parser->node_id,
7026 flags,
7027 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
7028 TOK2LOC(parser, keyword),
7029 NTOK2LOC(parser, do_keyword),
7030 TOK2LOC(parser, closing),
7031 predicate,
7032 statements
7033 );
7034}
7035
7039static pm_until_node_t *
7040pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7041 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7042 pm_loop_modifier_block_exits(parser, statements);
7043
7044 return pm_until_node_new(
7045 parser->arena,
7046 ++parser->node_id,
7047 flags,
7048 PM_LOCATION_INIT_NODES(statements, predicate),
7049 TOK2LOC(parser, keyword),
7050 ((pm_location_t) { 0 }),
7051 ((pm_location_t) { 0 }),
7052 predicate,
7053 statements
7054 );
7055}
7056
7060static pm_when_node_t *
7061pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7062 return pm_when_node_new(
7063 parser->arena,
7064 ++parser->node_id,
7065 0,
7066 PM_LOCATION_INIT_TOKEN(parser, keyword),
7067 TOK2LOC(parser, keyword),
7068 ((pm_node_list_t) { 0 }),
7069 ((pm_location_t) { 0 }),
7070 NULL
7071 );
7072}
7073
7077static void
7078pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
7079 PM_NODE_LENGTH_SET_NODE(node, condition);
7080 pm_node_list_append(arena, &node->conditions, condition);
7081}
7082
7086static PRISM_INLINE void
7087pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
7088 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
7089 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
7090}
7091
7095static void
7096pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7097 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
7098 PM_NODE_LENGTH_SET_NODE(node, statements);
7099 }
7100
7101 node->statements = statements;
7102}
7103
7107static pm_while_node_t *
7108pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7109 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7110
7111 return pm_while_node_new(
7112 parser->arena,
7113 ++parser->node_id,
7114 flags,
7115 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
7116 TOK2LOC(parser, keyword),
7117 NTOK2LOC(parser, do_keyword),
7118 TOK2LOC(parser, closing),
7119 predicate,
7120 statements
7121 );
7122}
7123
7127static pm_while_node_t *
7128pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7129 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7130 pm_loop_modifier_block_exits(parser, statements);
7131
7132 return pm_while_node_new(
7133 parser->arena,
7134 ++parser->node_id,
7135 flags,
7136 PM_LOCATION_INIT_NODES(statements, predicate),
7137 TOK2LOC(parser, keyword),
7138 ((pm_location_t) { 0 }),
7139 ((pm_location_t) { 0 }),
7140 predicate,
7141 statements
7142 );
7143}
7144
7148static pm_while_node_t *
7149pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7150 return pm_while_node_new(
7151 parser->arena,
7152 ++parser->node_id,
7153 0,
7154 PM_LOCATION_INIT_UNSET,
7155 ((pm_location_t) { 0 }),
7156 ((pm_location_t) { 0 }),
7157 ((pm_location_t) { 0 }),
7158 predicate,
7159 statements
7160 );
7161}
7162
7167static pm_x_string_node_t *
7168pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7169 return pm_x_string_node_new(
7170 parser->arena,
7171 ++parser->node_id,
7172 PM_STRING_FLAGS_FROZEN,
7173 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
7174 TOK2LOC(parser, opening),
7175 TOK2LOC(parser, content),
7176 TOK2LOC(parser, closing),
7177 *unescaped
7178 );
7179}
7180
7184static PRISM_INLINE pm_x_string_node_t *
7185pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7186 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7187}
7188
7192static pm_yield_node_t *
7193pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7194 uint32_t start = PM_TOKEN_START(parser, keyword);
7195 uint32_t end;
7196
7197 if (rparen_loc->length > 0) {
7198 end = PM_LOCATION_END(rparen_loc);
7199 } else if (arguments != NULL) {
7200 end = PM_NODE_END(arguments);
7201 } else if (lparen_loc->length > 0) {
7202 end = PM_LOCATION_END(lparen_loc);
7203 } else {
7204 end = PM_TOKEN_END(parser, keyword);
7205 }
7206
7207 return pm_yield_node_new(
7208 parser->arena,
7209 ++parser->node_id,
7210 0,
7211 ((pm_location_t) { .start = start, .length = U32(end - start) }),
7212 TOK2LOC(parser, keyword),
7213 *lparen_loc,
7214 arguments,
7215 *rparen_loc
7216 );
7217}
7218
7223static int
7224pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7225 pm_scope_t *scope = parser->current_scope;
7226 int depth = 0;
7227
7228 while (scope != NULL) {
7229 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7230 if (scope->closed) break;
7231
7232 scope = scope->previous;
7233 depth++;
7234 }
7235
7236 return -1;
7237}
7238
7244static PRISM_INLINE int
7245pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7246 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7247}
7248
7252static PRISM_INLINE void
7253pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7254 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
7255}
7256
7260static pm_constant_id_t
7261pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7262 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
7263 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7264 return constant_id;
7265}
7266
7270static PRISM_INLINE pm_constant_id_t
7271pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
7272 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
7273}
7274
7278static PRISM_INLINE pm_constant_id_t
7279pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7280 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
7281}
7282
7286static pm_constant_id_t
7287pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7288 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7289 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7290 return constant_id;
7291}
7292
7296static pm_constant_id_t
7297pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7298 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7299 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7300 return constant_id;
7301}
7302
7310static bool
7311pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7312 // We want to check whether the parameter name is a numbered parameter or
7313 // not.
7314 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7315
7316 // Otherwise we'll fetch the constant id for the parameter name and check
7317 // whether it's already in the current scope.
7318 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7319
7320 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7321 // Add an error if the parameter doesn't start with _ and has been seen before
7322 if ((name->start < name->end) && (*name->start != '_')) {
7323 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7324 }
7325 return true;
7326 }
7327 return false;
7328}
7329
7333static void
7334pm_parser_scope_pop(pm_parser_t *parser) {
7335 pm_scope_t *scope = parser->current_scope;
7336 parser->current_scope = scope->previous;
7337 pm_locals_free(&scope->locals);
7338 xfree_sized(scope, sizeof(pm_scope_t));
7339}
7340
7341/******************************************************************************/
7342/* Stack helpers */
7343/******************************************************************************/
7344
7348static PRISM_INLINE void
7349pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7350 *stack = (*stack << 1) | (value & 1);
7351}
7352
7356static PRISM_INLINE void
7357pm_state_stack_pop(pm_state_stack_t *stack) {
7358 *stack >>= 1;
7359}
7360
7364static PRISM_INLINE bool
7365pm_state_stack_p(const pm_state_stack_t *stack) {
7366 return *stack & 1;
7367}
7368
7369static PRISM_INLINE void
7370pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7371 // Use the negation of the value to prevent stack overflow.
7372 pm_state_stack_push(&parser->accepts_block_stack, !value);
7373}
7374
7375static PRISM_INLINE void
7376pm_accepts_block_stack_pop(pm_parser_t *parser) {
7377 pm_state_stack_pop(&parser->accepts_block_stack);
7378}
7379
7380static PRISM_INLINE bool
7381pm_accepts_block_stack_p(pm_parser_t *parser) {
7382 return !pm_state_stack_p(&parser->accepts_block_stack);
7383}
7384
7385static PRISM_INLINE void
7386pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7387 pm_state_stack_push(&parser->do_loop_stack, value);
7388}
7389
7390static PRISM_INLINE void
7391pm_do_loop_stack_pop(pm_parser_t *parser) {
7392 pm_state_stack_pop(&parser->do_loop_stack);
7393}
7394
7395static PRISM_INLINE bool
7396pm_do_loop_stack_p(pm_parser_t *parser) {
7397 return pm_state_stack_p(&parser->do_loop_stack);
7398}
7399
7400/******************************************************************************/
7401/* Lexer check helpers */
7402/******************************************************************************/
7403
7408static PRISM_INLINE uint8_t
7409peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7410 if (cursor < parser->end) {
7411 return *cursor;
7412 } else {
7413 return '\0';
7414 }
7415}
7416
7422static PRISM_INLINE uint8_t
7423peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7424 return peek_at(parser, parser->current.end + offset);
7425}
7426
7431static PRISM_INLINE uint8_t
7432peek(const pm_parser_t *parser) {
7433 return peek_at(parser, parser->current.end);
7434}
7435
7440static PRISM_INLINE bool
7441match(pm_parser_t *parser, uint8_t value) {
7442 if (peek(parser) == value) {
7443 parser->current.end++;
7444 return true;
7445 }
7446 return false;
7447}
7448
7453static PRISM_INLINE size_t
7454match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7455 if (peek_at(parser, cursor) == '\n') {
7456 return 1;
7457 }
7458 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7459 return 2;
7460 }
7461 return 0;
7462}
7463
7469static PRISM_INLINE size_t
7470match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7471 return match_eol_at(parser, parser->current.end + offset);
7472}
7473
7479static PRISM_INLINE size_t
7480match_eol(pm_parser_t *parser) {
7481 return match_eol_at(parser, parser->current.end);
7482}
7483
7487static PRISM_INLINE const uint8_t *
7488next_newline(const uint8_t *cursor, ptrdiff_t length) {
7489 assert(length >= 0);
7490
7491 // Note that it's okay for us to use memchr here to look for \n because none
7492 // of the encodings that we support have \n as a component of a multi-byte
7493 // character.
7494 return memchr(cursor, '\n', (size_t) length);
7495}
7496
7500static PRISM_INLINE bool
7501ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7502 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7503}
7504
7509static bool
7510parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7511 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7512
7513 if (encoding != NULL) {
7514 if (parser->encoding != encoding) {
7515 parser->encoding = encoding;
7516 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7517 }
7518
7519 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7520 return true;
7521 }
7522
7523 return false;
7524}
7525
7530static void
7531parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7532 const uint8_t *cursor = parser->current.start + 1;
7533 const uint8_t *end = parser->current.end;
7534
7535 bool separator = false;
7536 while (true) {
7537 if (end - cursor <= 6) return;
7538 switch (cursor[6]) {
7539 case 'C': case 'c': cursor += 6; continue;
7540 case 'O': case 'o': cursor += 5; continue;
7541 case 'D': case 'd': cursor += 4; continue;
7542 case 'I': case 'i': cursor += 3; continue;
7543 case 'N': case 'n': cursor += 2; continue;
7544 case 'G': case 'g': cursor += 1; continue;
7545 case '=': case ':':
7546 separator = true;
7547 cursor += 6;
7548 break;
7549 default:
7550 cursor += 6;
7551 if (pm_char_is_whitespace(*cursor)) break;
7552 continue;
7553 }
7554 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7555 separator = false;
7556 }
7557
7558 while (true) {
7559 do {
7560 if (++cursor >= end) return;
7561 } while (pm_char_is_whitespace(*cursor));
7562
7563 if (separator) break;
7564 if (*cursor != '=' && *cursor != ':') return;
7565
7566 separator = true;
7567 cursor++;
7568 }
7569
7570 const uint8_t *value_start = cursor;
7571 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7572
7573 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7574 // If we were unable to parse the encoding value, then we've got an
7575 // issue because we didn't understand the encoding that the user was
7576 // trying to use. In this case we'll keep using the default encoding but
7577 // add an error to the parser to indicate an unsuccessful parse.
7578 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7579 }
7580}
7581
7582typedef enum {
7583 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7584 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7585 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7586} pm_magic_comment_boolean_value_t;
7587
7592static pm_magic_comment_boolean_value_t
7593parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7594 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7595 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7596 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7597 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7598 } else {
7599 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7600 }
7601}
7602
7603static PRISM_INLINE bool
7604pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7605 return b == '\'' || b == '"' || b == ':' || b == ';';
7606}
7607
7613static PRISM_INLINE const uint8_t *
7614parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
7615 // Scan for '*' as the middle character, since it is rarer than '-' in
7616 // typical comments and avoids repeated memchr calls for '-' that hit
7617 // dashes in words like "foo-bar".
7618 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor + 1, '*', (size_t) (end - cursor - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7619 if (cursor[-1] == '-' && cursor + 1 < end && cursor[1] == '-') {
7620 return cursor - 1;
7621 }
7622 }
7623 return NULL;
7624}
7625
7636static PRISM_INLINE bool
7637parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
7638 bool result = true;
7639
7640 const uint8_t *start = parser->current.start + 1;
7641 const uint8_t *end = parser->current.end;
7642 if (end - start <= 7) return false;
7643
7644 const uint8_t *cursor;
7645 bool indicator = false;
7646
7647 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7648 start = cursor + 3;
7649
7650 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7651 end = cursor;
7652 indicator = true;
7653 } else {
7654 // If we have a start marker but not an end marker, then we cannot
7655 // have a magic comment.
7656 return false;
7657 }
7658 } else {
7659 // Non-emacs magic comments must contain a colon for `key: value`.
7660 // Reject early if there is no colon to avoid scanning the entire
7661 // comment character-by-character.
7662 if (pm_memchr(start, ':', (size_t) (end - start), parser->encoding_changed, parser->encoding) == NULL) {
7663 return false;
7664 }
7665
7666 // Advance start past leading whitespace so the main loop begins
7667 // directly at the key, avoiding a redundant whitespace scan.
7668 start += pm_strspn_whitespace(start, end - start);
7669 }
7670
7671 cursor = start;
7672 while (cursor < end) {
7673 if (indicator) {
7674 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7675 }
7676
7677 const uint8_t *key_start = cursor;
7678 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7679
7680 const uint8_t *key_end = cursor;
7681 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7682 if (cursor == end) break;
7683
7684 if (*cursor == ':') {
7685 cursor++;
7686 } else {
7687 if (!indicator) return false;
7688 continue;
7689 }
7690
7691 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7692 if (cursor == end) break;
7693
7694 const uint8_t *value_start;
7695 const uint8_t *value_end;
7696
7697 if (*cursor == '"') {
7698 value_start = ++cursor;
7699 for (; cursor < end && *cursor != '"'; cursor++) {
7700 if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
7701 }
7702 value_end = cursor;
7703 if (cursor < end && *cursor == '"') cursor++;
7704 } else {
7705 value_start = cursor;
7706 while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
7707 value_end = cursor;
7708 }
7709
7710 if (indicator) {
7711 while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
7712 } else {
7713 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7714 if (cursor != end) return false;
7715 }
7716
7717 // Here, we need to do some processing on the key to swap out dashes for
7718 // underscores. We only need to do this if there _is_ a dash in the key.
7719 pm_string_t key;
7720 const size_t key_length = (size_t) (key_end - key_start);
7721 const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
7722
7723 if (dash == NULL) {
7724 pm_string_shared_init(&key, key_start, key_end);
7725 } else {
7726 uint8_t *buffer = xmalloc(key_length);
7727 if (buffer == NULL) break;
7728
7729 memcpy(buffer, key_start, key_length);
7730 buffer[dash - key_start] = '_';
7731
7732 while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
7733 buffer[dash - key_start] = '_';
7734 }
7735
7736 pm_string_owned_init(&key, buffer, key_length);
7737 }
7738
7739 // Finally, we can start checking the key against the list of known
7740 // magic comment keys, and potentially change state based on that.
7741 const uint8_t *key_source = pm_string_source(&key);
7742 uint32_t value_length = (uint32_t) (value_end - value_start);
7743
7744 // We only want to attempt to compare against encoding comments if it's
7745 // the first line in the file (or the second in the case of a shebang).
7746 if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
7747 if (
7748 (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
7749 (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
7750 ) {
7751 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7752 }
7753 }
7754
7755 if (key_length == 11) {
7756 if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
7757 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7758 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7759 PM_PARSER_WARN_TOKEN_FORMAT(
7760 parser,
7761 &parser->current,
7762 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7763 (int) key_length,
7764 (const char *) key_source,
7765 (int) value_length,
7766 (const char *) value_start
7767 );
7768 break;
7769 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7770 parser->warn_mismatched_indentation = false;
7771 break;
7772 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7773 parser->warn_mismatched_indentation = true;
7774 break;
7775 }
7776 }
7777 } else if (key_length == 21) {
7778 if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
7779 // We only want to handle frozen string literal comments if it's
7780 // before any semantic tokens have been seen.
7781 if (semantic_token_seen) {
7782 pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7783 } else {
7784 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7785 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7786 PM_PARSER_WARN_TOKEN_FORMAT(
7787 parser,
7788 &parser->current,
7789 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7790 (int) key_length,
7791 (const char *) key_source,
7792 (int) value_length,
7793 (const char *) value_start
7794 );
7795 break;
7796 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7797 parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED;
7798 break;
7799 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7800 parser->frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED;
7801 break;
7802 }
7803 }
7804 }
7805 } else if (key_length == 24) {
7806 if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
7807 const uint8_t *cursor = parser->current.start;
7808 while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
7809
7810 if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
7811 pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7812 } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
7813 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7814 } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
7815 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7816 } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
7817 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7818 } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
7819 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7820 } else {
7821 PM_PARSER_WARN_TOKEN_FORMAT(
7822 parser,
7823 &parser->current,
7824 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7825 (int) key_length,
7826 (const char *) key_source,
7827 (int) value_length,
7828 (const char *) value_start
7829 );
7830 }
7831 }
7832 }
7833
7834 // When we're done, we want to free the string in case we had to
7835 // allocate memory for it.
7836 pm_string_cleanup(&key);
7837
7838 // Allocate a new magic comment node to append to the parser's list.
7839 pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_magic_comment_t), PRISM_ALIGNOF(pm_magic_comment_t));
7840 magic_comment->node.next = NULL;
7841 magic_comment->key = (pm_location_t) { .start = U32(key_start - parser->start), .length = U32(key_length) };
7842 magic_comment->value = (pm_location_t) { .start = U32(value_start - parser->start), .length = value_length };
7843 pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
7844 }
7845
7846 return result;
7847}
7848
7849/******************************************************************************/
7850/* Context manipulations */
7851/******************************************************************************/
7852
7853static const uint32_t context_terminators[] = {
7854 [PM_CONTEXT_NONE] = 0,
7855 [PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7856 [PM_CONTEXT_BEGIN_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7857 [PM_CONTEXT_BEGIN_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7858 [PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7859 [PM_CONTEXT_BLOCK_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7860 [PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7861 [PM_CONTEXT_BLOCK_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7862 [PM_CONTEXT_BLOCK_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7863 [PM_CONTEXT_BLOCK_PARAMETERS] = (1U << PM_TOKEN_PIPE),
7864 [PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7865 [PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7866 [PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7867 [PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7868 [PM_CONTEXT_CLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7869 [PM_CONTEXT_CLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7870 [PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7871 [PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7872 [PM_CONTEXT_DEF_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7873 [PM_CONTEXT_DEF_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7874 [PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7875 [PM_CONTEXT_DEF_PARAMS] = (1U << PM_TOKEN_EOF),
7876 [PM_CONTEXT_DEFINED] = (1U << PM_TOKEN_EOF),
7877 [PM_CONTEXT_DEFAULT_PARAMS] = (1U << PM_TOKEN_COMMA) | (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7878 [PM_CONTEXT_ELSE] = (1U << PM_TOKEN_KEYWORD_END),
7879 [PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7880 [PM_CONTEXT_EMBEXPR] = (1U << PM_TOKEN_EMBEXPR_END),
7881 [PM_CONTEXT_FOR] = (1U << PM_TOKEN_KEYWORD_END),
7882 [PM_CONTEXT_FOR_INDEX] = (1U << PM_TOKEN_KEYWORD_IN),
7883 [PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7884 [PM_CONTEXT_LAMBDA_BRACES] = (1U << PM_TOKEN_BRACE_RIGHT),
7885 [PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7886 [PM_CONTEXT_LAMBDA_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7887 [PM_CONTEXT_LAMBDA_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7888 [PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7889 [PM_CONTEXT_LOOP_PREDICATE] = (1U << PM_TOKEN_KEYWORD_DO) | (1U << PM_TOKEN_KEYWORD_THEN),
7890 [PM_CONTEXT_MAIN] = (1U << PM_TOKEN_EOF),
7891 [PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7892 [PM_CONTEXT_MODULE_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7893 [PM_CONTEXT_MODULE_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7894 [PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7895 [PM_CONTEXT_MULTI_TARGET] = (1U << PM_TOKEN_EOF),
7896 [PM_CONTEXT_PARENS] = (1U << PM_TOKEN_PARENTHESIS_RIGHT),
7897 [PM_CONTEXT_POSTEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7898 [PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7899 [PM_CONTEXT_PREEXE] = (1U << PM_TOKEN_BRACE_RIGHT),
7900 [PM_CONTEXT_RESCUE_MODIFIER] = (1U << PM_TOKEN_EOF),
7901 [PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7902 [PM_CONTEXT_SCLASS_ENSURE] = (1U << PM_TOKEN_KEYWORD_END),
7903 [PM_CONTEXT_SCLASS_ELSE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_END),
7904 [PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7905 [PM_CONTEXT_TERNARY] = (1U << PM_TOKEN_EOF),
7906 [PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7907 [PM_CONTEXT_UNTIL] = (1U << PM_TOKEN_KEYWORD_END),
7908 [PM_CONTEXT_WHILE] = (1U << PM_TOKEN_KEYWORD_END),
7909};
7910
7911static PRISM_INLINE bool
7912context_terminator(pm_context_t context, pm_token_t *token) {
7913 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7914}
7915
7920static pm_context_t
7921context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
7922 pm_context_node_t *context_node = parser->current_context;
7923
7924 while (context_node != NULL) {
7925 if (context_terminator(context_node->context, token)) return context_node->context;
7926 context_node = context_node->prev;
7927 }
7928
7929 return PM_CONTEXT_NONE;
7930}
7931
7932static bool
7933context_push(pm_parser_t *parser, pm_context_t context) {
7934 pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
7935 if (context_node == NULL) return false;
7936
7937 *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
7938
7939 if (parser->current_context == NULL) {
7940 parser->current_context = context_node;
7941 } else {
7942 context_node->prev = parser->current_context;
7943 parser->current_context = context_node;
7944 }
7945
7946 return true;
7947}
7948
7949static void
7950context_pop(pm_parser_t *parser) {
7951 pm_context_node_t *prev = parser->current_context->prev;
7952 xfree_sized(parser->current_context, sizeof(pm_context_node_t));
7953 parser->current_context = prev;
7954}
7955
7956static bool
7957context_p(const pm_parser_t *parser, pm_context_t context) {
7958 pm_context_node_t *context_node = parser->current_context;
7959
7960 while (context_node != NULL) {
7961 if (context_node->context == context) return true;
7962 context_node = context_node->prev;
7963 }
7964
7965 return false;
7966}
7967
7968static bool
7969context_def_p(const pm_parser_t *parser) {
7970 pm_context_node_t *context_node = parser->current_context;
7971
7972 while (context_node != NULL) {
7973 switch (context_node->context) {
7974 case PM_CONTEXT_DEF:
7975 case PM_CONTEXT_DEF_PARAMS:
7976 case PM_CONTEXT_DEF_ENSURE:
7977 case PM_CONTEXT_DEF_RESCUE:
7978 case PM_CONTEXT_DEF_ELSE:
7979 return true;
7980 case PM_CONTEXT_CLASS:
7981 case PM_CONTEXT_CLASS_ENSURE:
7982 case PM_CONTEXT_CLASS_RESCUE:
7983 case PM_CONTEXT_CLASS_ELSE:
7984 case PM_CONTEXT_MODULE:
7985 case PM_CONTEXT_MODULE_ENSURE:
7986 case PM_CONTEXT_MODULE_RESCUE:
7987 case PM_CONTEXT_MODULE_ELSE:
7988 case PM_CONTEXT_SCLASS:
7989 case PM_CONTEXT_SCLASS_ENSURE:
7990 case PM_CONTEXT_SCLASS_RESCUE:
7991 case PM_CONTEXT_SCLASS_ELSE:
7992 return false;
7993 default:
7994 context_node = context_node->prev;
7995 }
7996 }
7997
7998 return false;
7999}
8000
8005static const char *
8006context_human(pm_context_t context) {
8007 switch (context) {
8008 case PM_CONTEXT_NONE:
8009 assert(false && "unreachable");
8010 return "";
8011 case PM_CONTEXT_BEGIN: return "begin statement";
8012 case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8013 case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8014 case PM_CONTEXT_BLOCK_PARAMETERS: return "'|'..'|' block parameter";
8015 case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8016 case PM_CONTEXT_CASE_IN: return "'in' clause";
8017 case PM_CONTEXT_CLASS: return "class definition";
8018 case PM_CONTEXT_DEF: return "method definition";
8019 case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8020 case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8021 case PM_CONTEXT_DEFINED: return "'defined?' expression";
8022 case PM_CONTEXT_ELSE:
8023 case PM_CONTEXT_BEGIN_ELSE:
8024 case PM_CONTEXT_BLOCK_ELSE:
8025 case PM_CONTEXT_CLASS_ELSE:
8026 case PM_CONTEXT_DEF_ELSE:
8027 case PM_CONTEXT_LAMBDA_ELSE:
8028 case PM_CONTEXT_MODULE_ELSE:
8029 case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8030 case PM_CONTEXT_ELSIF: return "'elsif' clause";
8031 case PM_CONTEXT_EMBEXPR: return "embedded expression";
8032 case PM_CONTEXT_BEGIN_ENSURE:
8033 case PM_CONTEXT_BLOCK_ENSURE:
8034 case PM_CONTEXT_CLASS_ENSURE:
8035 case PM_CONTEXT_DEF_ENSURE:
8036 case PM_CONTEXT_LAMBDA_ENSURE:
8037 case PM_CONTEXT_MODULE_ENSURE:
8038 case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8039 case PM_CONTEXT_FOR: return "for loop";
8040 case PM_CONTEXT_FOR_INDEX: return "for loop index";
8041 case PM_CONTEXT_IF: return "if statement";
8042 case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8043 case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8044 case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8045 case PM_CONTEXT_MAIN: return "top level context";
8046 case PM_CONTEXT_MODULE: return "module definition";
8047 case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8048 case PM_CONTEXT_PARENS: return "parentheses";
8049 case PM_CONTEXT_POSTEXE: return "'END' block";
8050 case PM_CONTEXT_PREDICATE: return "predicate";
8051 case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8052 case PM_CONTEXT_BEGIN_RESCUE:
8053 case PM_CONTEXT_BLOCK_RESCUE:
8054 case PM_CONTEXT_CLASS_RESCUE:
8055 case PM_CONTEXT_DEF_RESCUE:
8056 case PM_CONTEXT_LAMBDA_RESCUE:
8057 case PM_CONTEXT_MODULE_RESCUE:
8058 case PM_CONTEXT_RESCUE_MODIFIER:
8059 case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8060 case PM_CONTEXT_SCLASS: return "singleton class definition";
8061 case PM_CONTEXT_TERNARY: return "ternary expression";
8062 case PM_CONTEXT_UNLESS: return "unless statement";
8063 case PM_CONTEXT_UNTIL: return "until statement";
8064 case PM_CONTEXT_WHILE: return "while statement";
8065 }
8066
8067 assert(false && "unreachable");
8068 return "";
8069}
8070
8071/******************************************************************************/
8072/* Specific token lexers */
8073/******************************************************************************/
8074
8075static PRISM_INLINE void
8076pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8077 if (invalid != NULL) {
8078 pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8079 pm_parser_err(parser, U32(invalid - parser->start), 1, diag_id);
8080 }
8081}
8082
8083static size_t
8084pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8085 const uint8_t *invalid = NULL;
8086 size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8087 pm_strspn_number_validate(parser, string, length, invalid);
8088 return length;
8089}
8090
8091static size_t
8092pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8093 const uint8_t *invalid = NULL;
8094 size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8095 pm_strspn_number_validate(parser, string, length, invalid);
8096 return length;
8097}
8098
8099static size_t
8100pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8101 const uint8_t *invalid = NULL;
8102 size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8103 pm_strspn_number_validate(parser, string, length, invalid);
8104 return length;
8105}
8106
8107static size_t
8108pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8109 const uint8_t *invalid = NULL;
8110 size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8111 pm_strspn_number_validate(parser, string, length, invalid);
8112 return length;
8113}
8114
8115static pm_token_type_t
8116lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8117 pm_token_type_t type = PM_TOKEN_INTEGER;
8118
8119 // Here we're going to attempt to parse the optional decimal portion of a
8120 // float. If it's not there, then it's okay and we'll just continue on.
8121 if (peek(parser) == '.') {
8122 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8123 parser->current.end += 2;
8124 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8125 type = PM_TOKEN_FLOAT;
8126 } else {
8127 // If we had a . and then something else, then it's not a float
8128 // suffix on a number it's a method call or something else.
8129 return type;
8130 }
8131 }
8132
8133 // Here we're going to attempt to parse the optional exponent portion of a
8134 // float. If it's not there, it's okay and we'll just continue on.
8135 if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8136 if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8137 parser->current.end += 2;
8138
8139 if (pm_char_is_decimal_digit(peek(parser))) {
8140 parser->current.end++;
8141 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8142 } else {
8143 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8144 }
8145 } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8146 parser->current.end++;
8147 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8148 } else {
8149 return type;
8150 }
8151
8152 *seen_e = true;
8153 type = PM_TOKEN_FLOAT;
8154 }
8155
8156 return type;
8157}
8158
8159static pm_token_type_t
8160lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8161 pm_token_type_t type = PM_TOKEN_INTEGER;
8162 *seen_e = false;
8163
8164 if (peek_offset(parser, -1) == '0') {
8165 switch (*parser->current.end) {
8166 // 0d1111 is a decimal number
8167 case 'd':
8168 case 'D':
8169 parser->current.end++;
8170 if (pm_char_is_decimal_digit(peek(parser))) {
8171 parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8172 } else {
8173 match(parser, '_');
8174 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8175 }
8176
8177 break;
8178
8179 // 0b1111 is a binary number
8180 case 'b':
8181 case 'B':
8182 parser->current.end++;
8183 if (pm_char_is_binary_digit(peek(parser))) {
8184 parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8185 } else {
8186 match(parser, '_');
8187 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8188 }
8189
8190 parser->integer.base = PM_INTEGER_BASE_FLAGS_BINARY;
8191 break;
8192
8193 // 0o1111 is an octal number
8194 case 'o':
8195 case 'O':
8196 parser->current.end++;
8197 if (pm_char_is_octal_digit(peek(parser))) {
8198 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8199 } else {
8200 match(parser, '_');
8201 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8202 }
8203
8204 parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
8205 break;
8206
8207 // 01111 is an octal number
8208 case '_':
8209 case '0':
8210 case '1':
8211 case '2':
8212 case '3':
8213 case '4':
8214 case '5':
8215 case '6':
8216 case '7':
8217 parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8218 parser->integer.base = PM_INTEGER_BASE_FLAGS_OCTAL;
8219 break;
8220
8221 // 0x1111 is a hexadecimal number
8222 case 'x':
8223 case 'X':
8224 parser->current.end++;
8225 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8226 parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8227 } else {
8228 match(parser, '_');
8229 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8230 }
8231
8232 parser->integer.base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8233 break;
8234
8235 // 0.xxx is a float
8236 case '.': {
8237 type = lex_optional_float_suffix(parser, seen_e);
8238 break;
8239 }
8240
8241 // 0exxx is a float
8242 case 'e':
8243 case 'E': {
8244 type = lex_optional_float_suffix(parser, seen_e);
8245 break;
8246 }
8247 }
8248 } else {
8249 // If it didn't start with a 0, then we'll lex as far as we can into a
8250 // decimal number. We compute the integer value inline to avoid
8251 // re-scanning the digits later in pm_integer_parse.
8252 {
8253 const uint8_t *cursor = parser->current.end;
8254 const uint8_t *end = parser->end;
8255 uint64_t value = (uint64_t) (cursor[-1] - '0');
8256
8257 bool has_underscore = false;
8258 bool prev_underscore = false;
8259 const uint8_t *invalid = NULL;
8260
8261 while (cursor < end) {
8262 uint8_t c = *cursor;
8263 if (c >= '0' && c <= '9') {
8264 if (value <= UINT32_MAX) value = value * 10 + (uint64_t) (c - '0');
8265 prev_underscore = false;
8266 cursor++;
8267 } else if (c == '_') {
8268 has_underscore = true;
8269 if (prev_underscore && invalid == NULL) invalid = cursor;
8270 prev_underscore = true;
8271 cursor++;
8272 } else {
8273 break;
8274 }
8275 }
8276
8277 if (has_underscore) {
8278 if (prev_underscore && invalid == NULL) invalid = cursor - 1;
8279 pm_strspn_number_validate(parser, parser->current.end, (size_t) (cursor - parser->current.end), invalid);
8280 }
8281
8282 if (value <= UINT32_MAX) {
8283 parser->integer.value = (uint32_t) value;
8284 parser->integer.lexed = true;
8285 }
8286
8287 parser->current.end = cursor;
8288 }
8289
8290 // Afterward, we'll lex as far as we can into an optional float suffix.
8291 // Guard the function call: the vast majority of decimal numbers are
8292 // plain integers, so avoid the call when the next byte cannot start a
8293 // float suffix.
8294 {
8295 uint8_t next = peek(parser);
8296 if (next == '.' || next == 'e' || next == 'E') {
8297 type = lex_optional_float_suffix(parser, seen_e);
8298
8299 // If it turned out to be a float, the cached integer value is
8300 // invalid.
8301 if (type != PM_TOKEN_INTEGER) {
8302 parser->integer.lexed = false;
8303 }
8304 }
8305 }
8306 }
8307
8308 // At this point we have a completed number, but we want to provide the user
8309 // with a good experience if they put an additional .xxx fractional
8310 // component on the end, so we'll check for that here.
8311 if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8312 const uint8_t *fraction_start = parser->current.end;
8313 const uint8_t *fraction_end = parser->current.end + 2;
8314 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8315 pm_parser_err(parser, U32(fraction_start - parser->start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
8316 }
8317
8318 return type;
8319}
8320
8321static pm_token_type_t
8322lex_numeric(pm_parser_t *parser) {
8323 pm_token_type_t type = PM_TOKEN_INTEGER;
8324 parser->integer.base = PM_INTEGER_BASE_FLAGS_DECIMAL;
8325 parser->integer.lexed = false;
8326
8327 if (parser->current.end < parser->end) {
8328 bool seen_e = false;
8329 type = lex_numeric_prefix(parser, &seen_e);
8330
8331 const uint8_t *end = parser->current.end;
8332 pm_token_type_t suffix_type = type;
8333
8334 if (type == PM_TOKEN_INTEGER) {
8335 if (match(parser, 'r')) {
8336 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8337
8338 if (match(parser, 'i')) {
8339 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8340 }
8341 } else if (match(parser, 'i')) {
8342 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8343 }
8344 } else {
8345 if (!seen_e && match(parser, 'r')) {
8346 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8347
8348 if (match(parser, 'i')) {
8349 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8350 }
8351 } else if (match(parser, 'i')) {
8352 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8353 }
8354 }
8355
8356 const uint8_t b = peek(parser);
8357 if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
8358 parser->current.end = end;
8359 } else {
8360 type = suffix_type;
8361 }
8362 }
8363
8364 return type;
8365}
8366
8367static pm_token_type_t
8368lex_global_variable(pm_parser_t *parser) {
8369 if (parser->current.end >= parser->end) {
8370 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8371 return PM_TOKEN_GLOBAL_VARIABLE;
8372 }
8373
8374 // True if multiple characters are allowed after the declaration of the
8375 // global variable. Not true when it starts with "$-".
8376 bool allow_multiple = true;
8377
8378 switch (*parser->current.end) {
8379 case '~': // $~: match-data
8380 case '*': // $*: argv
8381 case '$': // $$: pid
8382 case '?': // $?: last status
8383 case '!': // $!: error string
8384 case '@': // $@: error position
8385 case '/': // $/: input record separator
8386 case '\\': // $\: output record separator
8387 case ';': // $;: field separator
8388 case ',': // $,: output field separator
8389 case '.': // $.: last read line number
8390 case '=': // $=: ignorecase
8391 case ':': // $:: load path
8392 case '<': // $<: reading filename
8393 case '>': // $>: default output handle
8394 case '\"': // $": already loaded files
8395 parser->current.end++;
8396 return PM_TOKEN_GLOBAL_VARIABLE;
8397
8398 case '&': // $&: last match
8399 case '`': // $`: string before last match
8400 case '\'': // $': string after last match
8401 case '+': // $+: string matches last paren.
8402 parser->current.end++;
8403 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8404
8405 case '0': {
8406 parser->current.end++;
8407 size_t width;
8408
8409 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8410 do {
8411 parser->current.end += width;
8412 } while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8413
8414 // $0 isn't allowed to be followed by anything.
8415 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8416 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, diag_id);
8417 }
8418
8419 return PM_TOKEN_GLOBAL_VARIABLE;
8420 }
8421
8422 case '1':
8423 case '2':
8424 case '3':
8425 case '4':
8426 case '5':
8427 case '6':
8428 case '7':
8429 case '8':
8430 case '9':
8431 parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
8432 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8433
8434 case '-':
8435 parser->current.end++;
8436 allow_multiple = false;
8438 default: {
8439 size_t width;
8440
8441 if ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0) {
8442 do {
8443 parser->current.end += width;
8444 } while (allow_multiple && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) > 0);
8445 } else if (pm_char_is_whitespace(peek(parser))) {
8446 // If we get here, then we have a $ followed by whitespace,
8447 // which is not allowed.
8448 pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
8449 } else {
8450 // If we get here, then we have a $ followed by something that
8451 // isn't recognized as a global variable.
8452 pm_diagnostic_id_t diag_id = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
8453 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8454 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), diag_id, (int) (PM_TOKEN_LENGTH(&parser->current) + U32(width)), (const char *) parser->current.start);
8455 }
8456
8457 return PM_TOKEN_GLOBAL_VARIABLE;
8458 }
8459 }
8460}
8461
8474static PRISM_INLINE pm_token_type_t
8475lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
8476 if (memcmp(current_start, value, vlen) == 0) {
8477 pm_lex_state_t last_state = parser->lex_state;
8478
8479 if (parser->lex_state & PM_LEX_STATE_FNAME) {
8480 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8481 } else {
8482 lex_state_set(parser, state);
8483 if (state == PM_LEX_STATE_BEG) {
8484 parser->command_start = true;
8485 }
8486
8487 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8488 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8489 return modifier_type;
8490 }
8491 }
8492
8493 return type;
8494 }
8495
8496 return PM_TOKEN_EOF;
8497}
8498
8499static pm_token_type_t
8500lex_identifier(pm_parser_t *parser, bool previous_command_start) {
8501 // Lex as far as we can into the current identifier.
8502 size_t width;
8503 const uint8_t *end = parser->end;
8504 const uint8_t *current_start = parser->current.start;
8505 const uint8_t *current_end = parser->current.end;
8506 bool encoding_changed = parser->encoding_changed;
8507
8508 if (encoding_changed) {
8509 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8510 current_end += width;
8511 }
8512 } else {
8513 // Fast path: scan ASCII identifier bytes using wide operations.
8514 current_end += scan_identifier_ascii(current_end, end);
8515
8516 // Byte-at-a-time fallback for the tail and any UTF-8 sequences.
8517 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8518 current_end += width;
8519 }
8520 }
8521 parser->current.end = current_end;
8522
8523 // Now cache the length of the identifier so that we can quickly compare it
8524 // against known keywords.
8525 width = (size_t) (current_end - current_start);
8526
8527 if (current_end < end) {
8528 if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
8529 // First we'll attempt to extend the identifier by a ! or ?. Then we'll
8530 // check if we're returning the defined? keyword or just an identifier.
8531 width++;
8532
8533 if (
8534 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8535 (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
8536 ) {
8537 // If we're in a position where we can accept a : at the end of an
8538 // identifier, then we'll optionally accept it.
8539 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8540 (void) match(parser, ':');
8541 return PM_TOKEN_LABEL;
8542 }
8543
8544 if (parser->lex_state != PM_LEX_STATE_DOT) {
8545 if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8546 return PM_TOKEN_KEYWORD_DEFINED;
8547 }
8548 }
8549
8550 return PM_TOKEN_METHOD_NAME;
8551 }
8552
8553 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
8554 // If we're in a position where we can accept a = at the end of an
8555 // identifier, then we'll optionally accept it.
8556 return PM_TOKEN_IDENTIFIER;
8557 }
8558
8559 if (
8560 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8561 peek(parser) == ':' && peek_offset(parser, 1) != ':'
8562 ) {
8563 // If we're in a position where we can accept a : at the end of an
8564 // identifier, then we'll optionally accept it.
8565 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8566 (void) match(parser, ':');
8567 return PM_TOKEN_LABEL;
8568 }
8569 }
8570
8571 if (parser->lex_state != PM_LEX_STATE_DOT) {
8572 pm_token_type_t type;
8573 switch (width) {
8574 case 2:
8575 if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8576 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
8577 return PM_TOKEN_KEYWORD_DO;
8578 }
8579 if (pm_do_loop_stack_p(parser)) {
8580 return PM_TOKEN_KEYWORD_DO_LOOP;
8581 }
8582 if (!pm_accepts_block_stack_p(parser)) {
8583 return PM_TOKEN_KEYWORD_DO_BLOCK;
8584 }
8585 return PM_TOKEN_KEYWORD_DO;
8586 }
8587
8588 if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
8589 if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8590 if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8591 break;
8592 case 3:
8593 if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8594 if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8595 if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8596 if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8597 if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8598 if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8599 if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8600 break;
8601 case 4:
8602 if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8603 if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8604 if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8605 if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8606 if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8607 if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8608 if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8609 if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8610 break;
8611 case 5:
8612 if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8613 if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8614 if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8615 if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8616 if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8617 if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8618 if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8619 if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8620 if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8621 if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8622 if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
8623 if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
8624 if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8625 break;
8626 case 6:
8627 if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8628 if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8629 if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
8630 if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8631 if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
8632 break;
8633 case 8:
8634 if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8635 if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8636 break;
8637 case 12:
8638 if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
8639 break;
8640 }
8641 }
8642
8643 if (encoding_changed) {
8644 return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8645 }
8646 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8647}
8648
8653static bool
8654current_token_starts_line(pm_parser_t *parser) {
8655 return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
8656}
8657
8672static pm_token_type_t
8673lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
8674 // If there is no content following this #, then we're at the end of
8675 // the string and we can safely return string content.
8676 if (pound + 1 >= parser->end) {
8677 parser->current.end = pound + 1;
8678 return PM_TOKEN_STRING_CONTENT;
8679 }
8680
8681 // Now we'll check against the character that follows the #. If it
8682 // constitutes valid interplation, we'll handle that, otherwise we'll return
8683 // 0.
8684 switch (pound[1]) {
8685 case '@': {
8686 // In this case we may have hit an embedded instance or class variable.
8687 if (pound + 2 >= parser->end) {
8688 parser->current.end = pound + 1;
8689 return PM_TOKEN_STRING_CONTENT;
8690 }
8691
8692 // If we're looking at a @ and there's another @, then we'll skip past the
8693 // second @.
8694 const uint8_t *variable = pound + 2;
8695 if (*variable == '@' && pound + 3 < parser->end) variable++;
8696
8697 if (char_is_identifier_start(parser, variable, parser->end - variable)) {
8698 // At this point we're sure that we've either hit an embedded instance
8699 // or class variable. In this case we'll first need to check if we've
8700 // already consumed content.
8701 if (pound > parser->current.start) {
8702 parser->current.end = pound;
8703 return PM_TOKEN_STRING_CONTENT;
8704 }
8705
8706 // Otherwise we need to return the embedded variable token
8707 // and then switch to the embedded variable lex mode.
8708 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8709 parser->current.end = pound + 1;
8710 return PM_TOKEN_EMBVAR;
8711 }
8712
8713 // If we didn't get a valid interpolation, then this is just regular
8714 // string content. This is like if we get "#@-". In this case the caller
8715 // should keep lexing.
8716 parser->current.end = pound + 1;
8717 return 0;
8718 }
8719 case '$':
8720 // In this case we may have hit an embedded global variable. If there's
8721 // not enough room, then we'll just return string content.
8722 if (pound + 2 >= parser->end) {
8723 parser->current.end = pound + 1;
8724 return PM_TOKEN_STRING_CONTENT;
8725 }
8726
8727 // This is the character that we're going to check to see if it is the
8728 // start of an identifier that would indicate that this is a global
8729 // variable.
8730 const uint8_t *check = pound + 2;
8731
8732 if (pound[2] == '-') {
8733 if (pound + 3 >= parser->end) {
8734 parser->current.end = pound + 2;
8735 return PM_TOKEN_STRING_CONTENT;
8736 }
8737
8738 check++;
8739 }
8740
8741 // If the character that we're going to check is the start of an
8742 // identifier, or we don't have a - and the character is a decimal number
8743 // or a global name punctuation character, then we've hit an embedded
8744 // global variable.
8745 if (
8746 char_is_identifier_start(parser, check, parser->end - check) ||
8747 (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8748 ) {
8749 // In this case we've hit an embedded global variable. First check to
8750 // see if we've already consumed content. If we have, then we need to
8751 // return that content as string content first.
8752 if (pound > parser->current.start) {
8753 parser->current.end = pound;
8754 return PM_TOKEN_STRING_CONTENT;
8755 }
8756
8757 // Otherwise, we need to return the embedded variable token and switch
8758 // to the embedded variable lex mode.
8759 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8760 parser->current.end = pound + 1;
8761 return PM_TOKEN_EMBVAR;
8762 }
8763
8764 // In this case we've hit a #$ that does not indicate a global variable.
8765 // In this case we'll continue lexing past it.
8766 parser->current.end = pound + 1;
8767 return 0;
8768 case '{':
8769 // In this case it's the start of an embedded expression. If we have
8770 // already consumed content, then we need to return that content as string
8771 // content first.
8772 if (pound > parser->current.start) {
8773 parser->current.end = pound;
8774 return PM_TOKEN_STRING_CONTENT;
8775 }
8776
8777 parser->enclosure_nesting++;
8778
8779 // Otherwise we'll skip past the #{ and begin lexing the embedded
8780 // expression.
8781 lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8782 parser->current.end = pound + 2;
8783 parser->command_start = true;
8784 pm_do_loop_stack_push(parser, false);
8785 return PM_TOKEN_EMBEXPR_BEGIN;
8786 default:
8787 // In this case we've hit a # that doesn't constitute interpolation. We'll
8788 // mark that by returning the not provided token type. This tells the
8789 // consumer to keep lexing forward.
8790 parser->current.end = pound + 1;
8791 return 0;
8792 }
8793}
8794
8795static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8796static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8797static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8798static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8799static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8800
8804static const bool ascii_printable_chars[] = {
8805 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8806 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8807 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8808 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8809 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8810 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8811 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8812 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8813};
8814
8815static PRISM_INLINE bool
8816char_is_ascii_printable(const uint8_t b) {
8817 return (b < 0x80) && ascii_printable_chars[b];
8818}
8819
8824static PRISM_INLINE uint8_t
8825escape_hexadecimal_digit(const uint8_t value) {
8826 return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
8827}
8828
8834static PRISM_INLINE uint32_t
8835escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length, const pm_location_t *error_location, const uint8_t flags) {
8836 uint32_t value = 0;
8837 for (size_t index = 0; index < length; index++) {
8838 if (index != 0) value <<= 4;
8839 value |= escape_hexadecimal_digit(string[index]);
8840 }
8841
8842 // Here we're going to verify that the value is actually a valid Unicode
8843 // codepoint and not a surrogate pair.
8844 if (value >= 0xD800 && value <= 0xDFFF) {
8845 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8846 // In regexp context, defer the error to regexp encoding
8847 // validation where we can produce a regexp-specific message.
8848 } else if (error_location != NULL) {
8849 pm_parser_err(parser, error_location->start, error_location->length, PM_ERR_ESCAPE_INVALID_UNICODE);
8850 } else {
8851 pm_parser_err(parser, U32(string - parser->start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8852 }
8853 return 0xFFFD;
8854 }
8855
8856 return value;
8857}
8858
8862static PRISM_INLINE uint8_t
8863escape_byte(uint8_t value, const uint8_t flags) {
8864 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8865 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8866 return value;
8867}
8868
8872static PRISM_INLINE void
8873escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
8874 // \u escape sequences in string-like structures implicitly change the
8875 // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
8876 // literal.
8877 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8878 if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
8879 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8880 // In regexp context, suppress this error — the regexp encoding
8881 // validation will produce a more specific error message.
8882 } else {
8883 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
8884 }
8885 }
8886
8887 parser->explicit_encoding = PM_ENCODING_UTF_8_ENTRY;
8888 }
8889
8890 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8891 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8892 // In regexp context, defer the error to the regexp encoding
8893 // validation which produces a regexp-specific message.
8894 } else {
8895 pm_parser_err(parser, U32(start - parser->start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8896 }
8897
8898 pm_buffer_append_byte(buffer, 0xEF);
8899 pm_buffer_append_byte(buffer, 0xBF);
8900 pm_buffer_append_byte(buffer, 0xBD);
8901 }
8902}
8903
8908static PRISM_INLINE void
8909escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, uint8_t byte) {
8910 if (byte >= 0x80) {
8911 if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
8912 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8913 // In regexp context, suppress this error — the regexp encoding
8914 // validation will produce a more specific error message.
8915 } else {
8916 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
8917 }
8918 }
8919
8920 parser->explicit_encoding = parser->encoding;
8921 }
8922
8923 pm_buffer_append_byte(buffer, byte);
8924}
8925
8941static PRISM_INLINE void
8942escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
8943 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8944 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
8945 }
8946
8947 escape_write_byte_encoded(parser, buffer, flags, byte);
8948}
8949
8953static PRISM_INLINE void
8954escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
8955 size_t width;
8956 if (parser->encoding_changed) {
8957 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8958 } else {
8959 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
8960 }
8961
8962 if (width == 1) {
8963 if (*parser->current.end == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
8964 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->current.end++, flags));
8965 } else if (width > 1) {
8966 // Valid multibyte character. Just ignore escape.
8967 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8968 pm_buffer_append_bytes(b, parser->current.end, width);
8969 parser->current.end += width;
8970 } else {
8971 // Assume the next character wasn't meant to be part of this escape
8972 // sequence since it is invalid. Add an error and move on.
8973 parser->current.end++;
8974 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8975 }
8976}
8977
8983static void
8984escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
8985#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8986
8987 PM_PARSER_WARN_TOKEN_FORMAT(
8988 parser,
8989 &parser->current,
8990 PM_WARN_INVALID_CHARACTER,
8991 FLAG(flags),
8992 FLAG(flag),
8993 type
8994 );
8995
8996#undef FLAG
8997}
8998
9002static void
9003escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9004 uint8_t peeked = peek(parser);
9005 switch (peeked) {
9006 case '\\': {
9007 parser->current.end++;
9008 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9009 return;
9010 }
9011 case '\'': {
9012 parser->current.end++;
9013 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9014 return;
9015 }
9016 case 'a': {
9017 parser->current.end++;
9018 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9019 return;
9020 }
9021 case 'b': {
9022 parser->current.end++;
9023 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9024 return;
9025 }
9026 case 'e': {
9027 parser->current.end++;
9028 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9029 return;
9030 }
9031 case 'f': {
9032 parser->current.end++;
9033 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9034 return;
9035 }
9036 case 'n': {
9037 parser->current.end++;
9038 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9039 return;
9040 }
9041 case 'r': {
9042 parser->current.end++;
9043 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9044 return;
9045 }
9046 case 's': {
9047 parser->current.end++;
9048 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9049 return;
9050 }
9051 case 't': {
9052 parser->current.end++;
9053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9054 return;
9055 }
9056 case 'v': {
9057 parser->current.end++;
9058 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9059 return;
9060 }
9061 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9062 uint8_t value = (uint8_t) (*parser->current.end - '0');
9063 parser->current.end++;
9064
9065 if (pm_char_is_octal_digit(peek(parser))) {
9066 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9067 parser->current.end++;
9068
9069 if (pm_char_is_octal_digit(peek(parser))) {
9070 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9071 parser->current.end++;
9072 }
9073 }
9074
9075 value = escape_byte(value, flags);
9076 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9077 return;
9078 }
9079 case 'x': {
9080 const uint8_t *start = parser->current.end - 1;
9081
9082 parser->current.end++;
9083 uint8_t byte = peek(parser);
9084
9085 if (pm_char_is_hexadecimal_digit(byte)) {
9086 uint8_t value = escape_hexadecimal_digit(byte);
9087 parser->current.end++;
9088
9089 byte = peek(parser);
9090 if (pm_char_is_hexadecimal_digit(byte)) {
9091 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9092 parser->current.end++;
9093 }
9094
9095 value = escape_byte(value, flags);
9096 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9097 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9098 pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9099 } else {
9100 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9101 }
9102 }
9103
9104 escape_write_byte_encoded(parser, buffer, flags, value);
9105 } else {
9106 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9107 }
9108
9109 return;
9110 }
9111 case 'u': {
9112 const uint8_t *start = parser->current.end - 1;
9113 parser->current.end++;
9114
9115 if (parser->current.end == parser->end) {
9116 const uint8_t *start = parser->current.end - 2;
9117 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9118 } else if (peek(parser) == '{') {
9119 const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9120 parser->current.end++;
9121
9122 size_t whitespace;
9123 while (true) {
9124 if ((whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9125 parser->current.end += whitespace;
9126 } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9127 // This is super hacky, but it gets us nicer error
9128 // messages because we can still pass it off to the
9129 // regular expression engine even if we hit an
9130 // unterminated regular expression.
9131 parser->current.end += 2;
9132 } else {
9133 break;
9134 }
9135 }
9136
9137 const uint8_t *extra_codepoints_start = NULL;
9138 int codepoints_count = 0;
9139
9140 while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9141 const uint8_t *unicode_start = parser->current.end;
9142 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9143
9144 if (hexadecimal_length > 6) {
9145 // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9146 pm_parser_err(parser, U32(unicode_start - parser->start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9147 } else if (hexadecimal_length == 0) {
9148 // there are not hexadecimal characters
9149
9150 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9151 // If this is a regular expression, we are going to
9152 // let the regular expression engine handle this
9153 // error instead of us because we don't know at this
9154 // point if we're inside a comment in /x mode.
9155 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9156 } else {
9157 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
9158 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9159 }
9160
9161 return;
9162 }
9163
9164 parser->current.end += hexadecimal_length;
9165 codepoints_count++;
9166 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9167 extra_codepoints_start = unicode_start;
9168 }
9169
9170 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL, flags);
9171 escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9172
9173 parser->current.end += pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
9174 }
9175
9176 // ?\u{nnnn} character literal should contain only one codepoint
9177 // and cannot be like ?\u{nnnn mmmm}.
9178 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9179 pm_parser_err(parser, U32(extra_codepoints_start - parser->start), U32(parser->current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9180 }
9181
9182 if (parser->current.end == parser->end) {
9183 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9184 } else if (peek(parser) == '}') {
9185 parser->current.end++;
9186 } else {
9187 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9188 // If this is a regular expression, we are going to let
9189 // the regular expression engine handle this error
9190 // instead of us because we don't know at this point if
9191 // we're inside a comment in /x mode.
9192 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9193 } else {
9194 pm_parser_err(parser, U32(unicode_codepoints_start - parser->start), U32(parser->current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9195 }
9196 }
9197
9198 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9199 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9200 }
9201 } else {
9202 size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9203
9204 if (length == 0) {
9205 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9206 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9207 } else {
9208 const uint8_t *start = parser->current.end - 2;
9209 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->start), U32(parser->current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9210 }
9211 } else if (length == 4) {
9212 uint32_t value = escape_unicode(parser, parser->current.end, 4, NULL, flags);
9213
9214 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9215 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9216 }
9217
9218 escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9219 parser->current.end += 4;
9220 } else {
9221 parser->current.end += length;
9222
9223 if (flags & PM_ESCAPE_FLAG_REGEXP) {
9224 // If this is a regular expression, we are going to let
9225 // the regular expression engine handle this error
9226 // instead of us.
9227 pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9228 } else {
9229 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9230 }
9231 }
9232 }
9233
9234 return;
9235 }
9236 case 'c': {
9237 parser->current.end++;
9238 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9239 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9240 }
9241
9242 if (parser->current.end == parser->end) {
9243 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9244 return;
9245 }
9246
9247 uint8_t peeked = peek(parser);
9248 switch (peeked) {
9249 case '?': {
9250 parser->current.end++;
9251 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9252 return;
9253 }
9254 case '\\':
9255 parser->current.end++;
9256
9257 if (match(parser, 'u') || match(parser, 'U')) {
9258 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9259 return;
9260 }
9261
9262 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9263 return;
9264 case ' ':
9265 parser->current.end++;
9266 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9267 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9268 return;
9269 case '\t':
9270 parser->current.end++;
9271 escape_read_warn(parser, flags, 0, "\\t");
9272 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9273 return;
9274 default: {
9275 if (!char_is_ascii_printable(peeked)) {
9276 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9277 return;
9278 }
9279
9280 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9281 parser->current.end++;
9282 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9283 return;
9284 }
9285 }
9286 }
9287 case 'C': {
9288 parser->current.end++;
9289 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9290 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9291 }
9292
9293 if (peek(parser) != '-') {
9294 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9295 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9296 return;
9297 }
9298
9299 parser->current.end++;
9300 if (parser->current.end == parser->end) {
9301 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9302 return;
9303 }
9304
9305 uint8_t peeked = peek(parser);
9306 switch (peeked) {
9307 case '?': {
9308 parser->current.end++;
9309 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9310 return;
9311 }
9312 case '\\':
9313 parser->current.end++;
9314
9315 if (match(parser, 'u') || match(parser, 'U')) {
9316 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9317 return;
9318 }
9319
9320 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9321 return;
9322 case ' ':
9323 parser->current.end++;
9324 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9325 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9326 return;
9327 case '\t':
9328 parser->current.end++;
9329 escape_read_warn(parser, flags, 0, "\\t");
9330 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9331 return;
9332 default: {
9333 if (!char_is_ascii_printable(peeked)) {
9334 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9335 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9336 return;
9337 }
9338
9339 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9340 parser->current.end++;
9341 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9342 return;
9343 }
9344 }
9345 }
9346 case 'M': {
9347 parser->current.end++;
9348 if (flags & PM_ESCAPE_FLAG_META) {
9349 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9350 }
9351
9352 if (peek(parser) != '-') {
9353 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9354 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9355 return;
9356 }
9357
9358 parser->current.end++;
9359 if (parser->current.end == parser->end) {
9360 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9361 return;
9362 }
9363
9364 uint8_t peeked = peek(parser);
9365 switch (peeked) {
9366 case '\\':
9367 parser->current.end++;
9368
9369 if (match(parser, 'u') || match(parser, 'U')) {
9370 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9371 return;
9372 }
9373
9374 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9375 return;
9376 case ' ':
9377 parser->current.end++;
9378 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
9379 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9380 return;
9381 case '\t':
9382 parser->current.end++;
9383 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
9384 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9385 return;
9386 default:
9387 if (!char_is_ascii_printable(peeked)) {
9388 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9389 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9390 return;
9391 }
9392
9393 if (peeked == '\n') pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
9394 parser->current.end++;
9395 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9396 return;
9397 }
9398 }
9399 case '\r': {
9400 if (peek_offset(parser, 1) == '\n') {
9401 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 2);
9402 parser->current.end += 2;
9403 escape_write_byte_encoded(parser, buffer, flags, escape_byte('\n', flags));
9404 return;
9405 }
9407 }
9408 default: {
9409 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9410 size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9411 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9412 return;
9413 }
9414 if (parser->current.end < parser->end) {
9415 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9416 } else {
9417 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9418 }
9419 return;
9420 }
9421 }
9422}
9423
9449static pm_token_type_t
9450lex_question_mark(pm_parser_t *parser) {
9451 if (lex_state_end_p(parser)) {
9452 lex_state_set(parser, PM_LEX_STATE_BEG);
9453 return PM_TOKEN_QUESTION_MARK;
9454 }
9455
9456 if (parser->current.end >= parser->end) {
9457 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9458 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9459 return PM_TOKEN_CHARACTER_LITERAL;
9460 }
9461
9462 if (pm_char_is_whitespace(*parser->current.end)) {
9463 lex_state_set(parser, PM_LEX_STATE_BEG);
9464 return PM_TOKEN_QUESTION_MARK;
9465 }
9466
9467 lex_state_set(parser, PM_LEX_STATE_BEG);
9468
9469 if (match(parser, '\\')) {
9470 lex_state_set(parser, PM_LEX_STATE_END);
9471
9472 pm_buffer_t buffer;
9473 pm_buffer_init(&buffer, 3);
9474
9475 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9476
9477 // Copy buffer data into the arena and free the heap buffer.
9478 void *arena_data = pm_arena_memdup(parser->arena, buffer.value, buffer.length, PRISM_ALIGNOF(uint8_t));
9479 pm_string_constant_init(&parser->current_string, (const char *) arena_data, buffer.length);
9480 pm_buffer_cleanup(&buffer);
9481
9482 return PM_TOKEN_CHARACTER_LITERAL;
9483 } else {
9484 size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9485
9486 // Ternary operators can have a ? immediately followed by an identifier
9487 // which starts with an underscore. We check for this case here.
9488 if (
9489 !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
9490 (
9491 (parser->current.end + encoding_width >= parser->end) ||
9492 !char_is_identifier(parser, parser->current.end + encoding_width, parser->end - (parser->current.end + encoding_width))
9493 )
9494 ) {
9495 lex_state_set(parser, PM_LEX_STATE_END);
9496 parser->current.end += encoding_width;
9497 pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
9498 return PM_TOKEN_CHARACTER_LITERAL;
9499 }
9500 }
9501
9502 return PM_TOKEN_QUESTION_MARK;
9503}
9504
9509static pm_token_type_t
9510lex_at_variable(pm_parser_t *parser) {
9511 pm_token_type_t type = match(parser, '@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9512 const uint8_t *end = parser->end;
9513
9514 size_t width;
9515 if ((width = char_is_identifier_start(parser, parser->current.end, end - parser->current.end)) > 0) {
9516 parser->current.end += width;
9517
9518 while ((width = char_is_identifier(parser, parser->current.end, end - parser->current.end)) > 0) {
9519 parser->current.end += width;
9520 }
9521 } else if (parser->current.end < end && pm_char_is_decimal_digit(*parser->current.end)) {
9522 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9523 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) {
9524 diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9525 }
9526
9527 size_t width = parser->encoding->char_width(parser->current.end, end - parser->current.end);
9528 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
9529 } else {
9530 pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9531 pm_parser_err_token(parser, &parser->current, diag_id);
9532 }
9533
9534 // If we're lexing an embedded variable, then we need to pop back into the
9535 // parent lex context.
9536 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
9537 lex_mode_pop(parser);
9538 }
9539
9540 return type;
9541}
9542
9546static PRISM_INLINE void
9547parser_lex_callback(pm_parser_t *parser) {
9548 if (parser->lex_callback.callback) {
9549 parser->lex_callback.callback(parser, &parser->current, parser->lex_callback.data);
9550 }
9551}
9552
9557parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
9558 pm_comment_t *comment = (pm_comment_t *) pm_arena_alloc(&parser->metadata_arena, sizeof(pm_comment_t), PRISM_ALIGNOF(pm_comment_t));
9559
9560 *comment = (pm_comment_t) {
9561 .type = type,
9562 .location = TOK2LOC(parser, &parser->current)
9563 };
9564
9565 return comment;
9566}
9567
9573static pm_token_type_t
9574lex_embdoc(pm_parser_t *parser) {
9575 // First, lex out the EMBDOC_BEGIN token.
9576 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9577
9578 if (newline == NULL) {
9579 parser->current.end = parser->end;
9580 } else {
9581 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9582 parser->current.end = newline + 1;
9583 }
9584
9585 parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
9586 parser_lex_callback(parser);
9587
9588 // Now, create a comment that is going to be attached to the parser.
9589 const uint8_t *comment_start = parser->current.start;
9590 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9591
9592 // Now, loop until we find the end of the embedded documentation or the end
9593 // of the file.
9594 while (parser->current.end + 4 <= parser->end) {
9595 parser->current.start = parser->current.end;
9596
9597 // If we've hit the end of the embedded documentation then we'll return
9598 // that token here.
9599 if (
9600 (memcmp(parser->current.end, "=end", 4) == 0) &&
9601 (
9602 (parser->current.end + 4 == parser->end) || // end of file
9603 pm_char_is_whitespace(parser->current.end[4]) || // whitespace
9604 (parser->current.end[4] == '\0') || // NUL or end of script
9605 (parser->current.end[4] == '\004') || // ^D
9606 (parser->current.end[4] == '\032') // ^Z
9607 )
9608 ) {
9609 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9610
9611 if (newline == NULL) {
9612 parser->current.end = parser->end;
9613 } else {
9614 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9615 parser->current.end = newline + 1;
9616 }
9617
9618 parser->current.type = PM_TOKEN_EMBDOC_END;
9619 parser_lex_callback(parser);
9620
9621 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9622 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9623
9624 return PM_TOKEN_EMBDOC_END;
9625 }
9626
9627 // Otherwise, we'll parse until the end of the line and return a line of
9628 // embedded documentation.
9629 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
9630
9631 if (newline == NULL) {
9632 parser->current.end = parser->end;
9633 } else {
9634 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
9635 parser->current.end = newline + 1;
9636 }
9637
9638 parser->current.type = PM_TOKEN_EMBDOC_LINE;
9639 parser_lex_callback(parser);
9640 }
9641
9642 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9643
9644 comment->location.length = (uint32_t) (parser->current.end - comment_start);
9645 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
9646
9647 return PM_TOKEN_EOF;
9648}
9649
9655static PRISM_INLINE void
9656parser_lex_ignored_newline(pm_parser_t *parser) {
9657 parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
9658 parser_lex_callback(parser);
9659}
9660
9670static PRISM_INLINE void
9671parser_flush_heredoc_end(pm_parser_t *parser) {
9672 assert(parser->heredoc_end <= parser->end);
9673 parser->next_start = parser->heredoc_end;
9674 parser->heredoc_end = NULL;
9675}
9676
9680static bool
9681parser_end_of_line_p(const pm_parser_t *parser) {
9682 const uint8_t *cursor = parser->current.end;
9683
9684 while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
9685 if (!pm_char_is_inline_whitespace(*cursor++)) return false;
9686 }
9687
9688 return true;
9689}
9690
9709typedef struct {
9715
9720 const uint8_t *cursor;
9722
9742
9746static PRISM_INLINE void
9747pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
9748 pm_buffer_append_byte(&token_buffer->buffer, byte);
9749}
9750
9751static PRISM_INLINE void
9752pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
9753 pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
9754}
9755
9759static PRISM_INLINE size_t
9760parser_char_width(const pm_parser_t *parser) {
9761 size_t width;
9762 if (parser->encoding_changed) {
9763 width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9764 } else {
9765 width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9766 }
9767
9768 // TODO: If the character is invalid in the given encoding, then we'll just
9769 // push one byte into the buffer. This should actually be an error.
9770 return (width == 0 ? 1 : width);
9771}
9772
9776static void
9777pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
9778 size_t width = parser_char_width(parser);
9779 pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
9780 parser->current.end += width;
9781}
9782
9783static void
9784pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
9785 size_t width = parser_char_width(parser);
9786 const uint8_t *start = parser->current.end;
9787 pm_buffer_append_bytes(&token_buffer->base.buffer, start, width);
9788 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, width);
9789 parser->current.end += width;
9790}
9791
9798static PRISM_INLINE void
9799pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9800 // Copy buffer data into the arena and free the heap buffer.
9801 size_t len = pm_buffer_length(&token_buffer->buffer);
9802 void *arena_data = pm_arena_memdup(parser->arena, pm_buffer_value(&token_buffer->buffer), len, PRISM_ALIGNOF(uint8_t));
9803 pm_string_constant_init(&parser->current_string, (const char *) arena_data, len);
9804 pm_buffer_cleanup(&token_buffer->buffer);
9805}
9806
9807static PRISM_INLINE void
9808pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9809 pm_token_buffer_copy(parser, &token_buffer->base);
9810 pm_buffer_cleanup(&token_buffer->regexp_buffer);
9811}
9812
9822static void
9823pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9824 if (token_buffer->cursor == NULL) {
9825 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9826 } else {
9827 pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
9828 pm_token_buffer_copy(parser, token_buffer);
9829 }
9830}
9831
9832static void
9833pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9834 if (token_buffer->base.cursor == NULL) {
9835 pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
9836 } else {
9837 const uint8_t *cursor = token_buffer->base.cursor;
9838 size_t length = (size_t) (parser->current.end - cursor);
9839 pm_buffer_append_bytes(&token_buffer->base.buffer, cursor, length);
9840 pm_buffer_append_bytes(&token_buffer->regexp_buffer, cursor, length);
9841 pm_regexp_token_buffer_copy(parser, token_buffer);
9842 }
9843}
9844
9845#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9846
9855static void
9856pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
9857 const uint8_t *start;
9858 if (token_buffer->cursor == NULL) {
9859 pm_buffer_init(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9860 start = parser->current.start;
9861 } else {
9862 start = token_buffer->cursor;
9863 }
9864
9865 const uint8_t *end = parser->current.end - 1;
9866 assert(end >= start);
9867 pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
9868
9869 token_buffer->cursor = end;
9870}
9871
9872static void
9873pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
9874 const uint8_t *start;
9875 if (token_buffer->base.cursor == NULL) {
9876 pm_buffer_init(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9877 pm_buffer_init(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9878 start = parser->current.start;
9879 } else {
9880 start = token_buffer->base.cursor;
9881 }
9882
9883 const uint8_t *end = parser->current.end - 1;
9884 pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
9885 pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
9886
9887 token_buffer->base.cursor = end;
9888}
9889
9890#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9891
9896static PRISM_INLINE size_t
9897pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
9898 size_t whitespace = 0;
9899
9900 switch (indent) {
9901 case PM_HEREDOC_INDENT_NONE:
9902 // Do nothing, we can't match a terminator with
9903 // indentation and there's no need to calculate common
9904 // whitespace.
9905 break;
9906 case PM_HEREDOC_INDENT_DASH:
9907 // Skip past inline whitespace.
9908 *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
9909 break;
9910 case PM_HEREDOC_INDENT_TILDE:
9911 // Skip past inline whitespace and calculate common
9912 // whitespace.
9913 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9914 if (**cursor == '\t') {
9915 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9916 } else {
9917 whitespace++;
9918 }
9919 (*cursor)++;
9920 }
9921
9922 break;
9923 }
9924
9925 return whitespace;
9926}
9927
9932static uint8_t
9933pm_lex_percent_delimiter(pm_parser_t *parser) {
9934 size_t eol_length = match_eol(parser);
9935
9936 if (eol_length) {
9937 if (parser->heredoc_end) {
9938 // If we have already lexed a heredoc, then the newline has already
9939 // been added to the list. In this case we want to just flush the
9940 // heredoc end.
9941 parser_flush_heredoc_end(parser);
9942 } else {
9943 // Otherwise, we'll add the newline to the list of newlines.
9944 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + U32(eol_length));
9945 }
9946
9947 uint8_t delimiter = *parser->current.end;
9948
9949 // If our delimiter is \r\n, we want to treat it as if it's \n.
9950 // For example, %\r\nfoo\r\n should be "foo"
9951 if (eol_length == 2) {
9952 delimiter = *(parser->current.end + 1);
9953 }
9954
9955 parser->current.end += eol_length;
9956 return delimiter;
9957 }
9958
9959 return *parser->current.end++;
9960}
9961
9966#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9967
9974static void
9975parser_lex(pm_parser_t *parser) {
9976 assert(parser->current.end <= parser->end);
9977 parser->previous = parser->current;
9978
9979 // This value mirrors cmd_state from CRuby.
9980 bool previous_command_start = parser->command_start;
9981 parser->command_start = false;
9982
9983 // This is used to communicate to the newline lexing function that we've
9984 // already seen a comment.
9985 bool lexed_comment = false;
9986
9987 // Here we cache the current value of the semantic token seen flag. This is
9988 // used to reset it in case we find a token that shouldn't flip this flag.
9989 unsigned int semantic_token_seen = parser->semantic_token_seen;
9990 parser->semantic_token_seen = true;
9991
9992 // We'll jump to this label when we are about to encounter an EOF.
9993 // If we still have lex_modes on the stack, we pop them so that cleanup
9994 // can happen. For example, we should still continue parsing after a heredoc
9995 // identifier, even if the heredoc body was syntax invalid.
9996 switch_lex_modes:
9997
9998 switch (parser->lex_modes.current->mode) {
9999 case PM_LEX_DEFAULT:
10000 case PM_LEX_EMBEXPR:
10001 case PM_LEX_EMBVAR:
10002
10003 // We have a specific named label here because we are going to jump back to
10004 // this location in the event that we have lexed a token that should not be
10005 // returned to the parser. This includes comments, ignored newlines, and
10006 // invalid tokens of some form.
10007 lex_next_token: {
10008 // If we have the special next_start pointer set, then we're going to jump
10009 // to that location and start lexing from there.
10010 if (parser->next_start != NULL) {
10011 parser->current.end = parser->next_start;
10012 parser->next_start = NULL;
10013 }
10014
10015 // This value mirrors space_seen from CRuby. It tracks whether or not
10016 // space has been eaten before the start of the next token.
10017 bool space_seen = false;
10018
10019 // First, we're going to skip past any whitespace at the front of the next
10020 // token. Skip runs of inline whitespace in bulk to avoid per-character
10021 // stores back to parser->current.end.
10022 bool chomping = true;
10023 while (parser->current.end < parser->end && chomping) {
10024 {
10025 static const uint8_t inline_whitespace[256] = {
10026 [' '] = 1, ['\t'] = 1, ['\f'] = 1, ['\v'] = 1
10027 };
10028 const uint8_t *scan = parser->current.end;
10029 while (scan < parser->end && inline_whitespace[*scan]) scan++;
10030 if (scan > parser->current.end) {
10031 parser->current.end = scan;
10032 space_seen = true;
10033 continue;
10034 }
10035 }
10036
10037 switch (*parser->current.end) {
10038 case '\r':
10039 if (match_eol_offset(parser, 1)) {
10040 chomping = false;
10041 } else {
10042 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10043 parser->current.end++;
10044 space_seen = true;
10045 }
10046 break;
10047 case '\\': {
10048 size_t eol_length = match_eol_offset(parser, 1);
10049 if (eol_length) {
10050 if (parser->heredoc_end) {
10051 parser->current.end = parser->heredoc_end;
10052 parser->heredoc_end = NULL;
10053 } else {
10054 parser->current.end += eol_length + 1;
10055 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
10056 space_seen = true;
10057 }
10058 } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10059 parser->current.end += 2;
10060 } else {
10061 chomping = false;
10062 }
10063
10064 break;
10065 }
10066 default:
10067 chomping = false;
10068 break;
10069 }
10070 }
10071
10072 // Next, we'll set to start of this token to be the current end.
10073 parser->current.start = parser->current.end;
10074
10075 // We'll check if we're at the end of the file. If we are, then we
10076 // need to return the EOF token.
10077 if (parser->current.end >= parser->end) {
10078 // We may be missing closing tokens. We should pop modes one by one
10079 // to do the appropriate cleanup like moving next_start for heredocs.
10080 // Only when no mode is remaining will we actually emit the EOF token.
10081 if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
10082 lex_mode_pop(parser);
10083 goto switch_lex_modes;
10084 }
10085
10086 // If we hit EOF, but the EOF came immediately after a newline,
10087 // set the start of the token to the newline. This way any EOF
10088 // errors will be reported as happening on that line rather than
10089 // a line after. For example "foo(\n" should report an error
10090 // on line 1 even though EOF technically occurs on line 2.
10091 if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10092 parser->current.start -= 1;
10093 }
10094 LEX(PM_TOKEN_EOF);
10095 }
10096
10097 // Finally, we'll check the current character to determine the next
10098 // token.
10099 switch (*parser->current.end++) {
10100 case '\0': // NUL or end of script
10101 case '\004': // ^D
10102 case '\032': // ^Z
10103 parser->current.end--;
10104 LEX(PM_TOKEN_EOF);
10105
10106 case '#': { // comments
10107 const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10108 parser->current.end = ending == NULL ? parser->end : ending;
10109
10110 // If we found a comment while lexing, then we're going to
10111 // add it to the list of comments in the file and keep
10112 // lexing.
10113 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10114 pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10115
10116 if (ending) parser->current.end++;
10117 parser->current.type = PM_TOKEN_COMMENT;
10118 parser_lex_callback(parser);
10119
10120 // Here, parse the comment to see if it's a magic comment
10121 // and potentially change state on the parser.
10122 if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10123 ptrdiff_t length = parser->current.end - parser->current.start;
10124
10125 // If we didn't find a magic comment within the first
10126 // pass and we're at the start of the file, then we need
10127 // to do another pass to potentially find other patterns
10128 // for encoding comments.
10129 if (length >= 10 && !parser->encoding_locked) {
10130 parser_lex_magic_comment_encoding(parser);
10131 }
10132 }
10133
10134 lexed_comment = true;
10135 }
10137 case '\r':
10138 case '\n': {
10139 parser->semantic_token_seen = semantic_token_seen & 0x1;
10140 size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10141
10142 if (eol_length) {
10143 // The only way you can have carriage returns in this
10144 // particular loop is if you have a carriage return
10145 // followed by a newline. In that case we'll just skip
10146 // over the carriage return and continue lexing, in
10147 // order to make it so that the newline token
10148 // encapsulates both the carriage return and the
10149 // newline. Note that we need to check that we haven't
10150 // already lexed a comment here because that falls
10151 // through into here as well.
10152 if (!lexed_comment) {
10153 parser->current.end += eol_length - 1; // skip CR
10154 }
10155
10156 if (parser->heredoc_end == NULL) {
10157 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
10158 }
10159 }
10160
10161 if (parser->heredoc_end) {
10162 parser_flush_heredoc_end(parser);
10163 }
10164
10165 // If this is an ignored newline, then we can continue lexing after
10166 // calling the callback with the ignored newline token.
10167 switch (lex_state_ignored_p(parser)) {
10168 case PM_IGNORED_NEWLINE_NONE:
10169 break;
10170 case PM_IGNORED_NEWLINE_PATTERN:
10171 if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10172 if (!lexed_comment) parser_lex_ignored_newline(parser);
10173 lex_state_set(parser, PM_LEX_STATE_BEG);
10174 parser->command_start = true;
10175 parser->current.type = PM_TOKEN_NEWLINE;
10176 return;
10177 }
10179 case PM_IGNORED_NEWLINE_ALL:
10180 if (!lexed_comment) parser_lex_ignored_newline(parser);
10181 lexed_comment = false;
10182 goto lex_next_token;
10183 }
10184
10185 // Here we need to look ahead and see if there is a call operator
10186 // (either . or &.) that starts the next line. If there is, then this
10187 // is going to become an ignored newline and we're going to instead
10188 // return the call operator.
10189 const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10190 next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10191
10192 if (next_content < parser->end) {
10193 // If we hit a comment after a newline, then we're going to check
10194 // if it's ignored or if it's followed by a method call ('.').
10195 // If it is, then we're going to call the
10196 // callback with an ignored newline and then continue lexing.
10197 // Otherwise we'll return a regular newline.
10198 if (next_content[0] == '#') {
10199 // Here we look for a "." or "&." following a "\n".
10200 const uint8_t *following = next_newline(next_content, parser->end - next_content);
10201
10202 while (following && (following + 1 < parser->end)) {
10203 following++;
10204 following += pm_strspn_inline_whitespace(following, parser->end - following);
10205
10206 // If this is not followed by a comment, then we can break out
10207 // of this loop.
10208 if (peek_at(parser, following) != '#') break;
10209
10210 // If there is a comment, then we need to find the end of the
10211 // comment and continue searching from there.
10212 following = next_newline(following, parser->end - following);
10213 }
10214
10215 // If the lex state was ignored, we will lex the
10216 // ignored newline.
10217 if (lex_state_ignored_p(parser)) {
10218 if (!lexed_comment) parser_lex_ignored_newline(parser);
10219 lexed_comment = false;
10220 goto lex_next_token;
10221 }
10222
10223 // If we hit a '.' or a '&.' we will lex the ignored
10224 // newline.
10225 if (following && (
10226 (peek_at(parser, following) == '.') ||
10227 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10228 )) {
10229 if (!lexed_comment) parser_lex_ignored_newline(parser);
10230 lexed_comment = false;
10231 goto lex_next_token;
10232 }
10233
10234
10235 // If we are parsing as CRuby 4.0 or later and we
10236 // hit a '&&' or a '||' then we will lex the ignored
10237 // newline.
10238 if (
10239 (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) &&
10240 following && (
10241 (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '&') ||
10242 (peek_at(parser, following) == '|' && peek_at(parser, following + 1) == '|') ||
10243 (
10244 peek_at(parser, following) == 'a' &&
10245 peek_at(parser, following + 1) == 'n' &&
10246 peek_at(parser, following + 2) == 'd' &&
10247 peek_at(parser, next_content + 3) != '!' &&
10248 peek_at(parser, next_content + 3) != '?' &&
10249 !char_is_identifier(parser, following + 3, parser->end - (following + 3))
10250 ) ||
10251 (
10252 peek_at(parser, following) == 'o' &&
10253 peek_at(parser, following + 1) == 'r' &&
10254 peek_at(parser, next_content + 2) != '!' &&
10255 peek_at(parser, next_content + 2) != '?' &&
10256 !char_is_identifier(parser, following + 2, parser->end - (following + 2))
10257 )
10258 )
10259 ) {
10260 if (!lexed_comment) parser_lex_ignored_newline(parser);
10261 lexed_comment = false;
10262 goto lex_next_token;
10263 }
10264 }
10265
10266 // If we hit a . after a newline, then we're in a call chain and
10267 // we need to return the call operator.
10268 if (next_content[0] == '.') {
10269 // To match ripper, we need to emit an ignored newline even though
10270 // it's a real newline in the case that we have a beginless range
10271 // on a subsequent line.
10272 if (peek_at(parser, next_content + 1) == '.') {
10273 if (!lexed_comment) parser_lex_ignored_newline(parser);
10274 lex_state_set(parser, PM_LEX_STATE_BEG);
10275 parser->command_start = true;
10276 parser->current.type = PM_TOKEN_NEWLINE;
10277 return;
10278 }
10279
10280 if (!lexed_comment) parser_lex_ignored_newline(parser);
10281 lex_state_set(parser, PM_LEX_STATE_DOT);
10282 parser->current.start = next_content;
10283 parser->current.end = next_content + 1;
10284 parser->next_start = NULL;
10285 LEX(PM_TOKEN_DOT);
10286 }
10287
10288 // If we hit a &. after a newline, then we're in a call chain and
10289 // we need to return the call operator.
10290 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10291 if (!lexed_comment) parser_lex_ignored_newline(parser);
10292 lex_state_set(parser, PM_LEX_STATE_DOT);
10293 parser->current.start = next_content;
10294 parser->current.end = next_content + 2;
10295 parser->next_start = NULL;
10296 LEX(PM_TOKEN_AMPERSAND_DOT);
10297 }
10298
10299 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
10300 // If we hit an && then we are in a logical chain
10301 // and we need to return the logical operator.
10302 if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '&') {
10303 if (!lexed_comment) parser_lex_ignored_newline(parser);
10304 lex_state_set(parser, PM_LEX_STATE_BEG);
10305 parser->current.start = next_content;
10306 parser->current.end = next_content + 2;
10307 parser->next_start = NULL;
10308 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10309 }
10310
10311 // If we hit a || then we are in a logical chain and
10312 // we need to return the logical operator.
10313 if (peek_at(parser, next_content) == '|' && peek_at(parser, next_content + 1) == '|') {
10314 if (!lexed_comment) parser_lex_ignored_newline(parser);
10315 lex_state_set(parser, PM_LEX_STATE_BEG);
10316 parser->current.start = next_content;
10317 parser->current.end = next_content + 2;
10318 parser->next_start = NULL;
10319 LEX(PM_TOKEN_PIPE_PIPE);
10320 }
10321
10322 // If we hit an 'and' then we are in a logical chain
10323 // and we need to return the logical operator.
10324 if (
10325 peek_at(parser, next_content) == 'a' &&
10326 peek_at(parser, next_content + 1) == 'n' &&
10327 peek_at(parser, next_content + 2) == 'd' &&
10328 peek_at(parser, next_content + 3) != '!' &&
10329 peek_at(parser, next_content + 3) != '?' &&
10330 !char_is_identifier(parser, next_content + 3, parser->end - (next_content + 3))
10331 ) {
10332 if (!lexed_comment) parser_lex_ignored_newline(parser);
10333 lex_state_set(parser, PM_LEX_STATE_BEG);
10334 parser->current.start = next_content;
10335 parser->current.end = next_content + 3;
10336 parser->next_start = NULL;
10337 parser->command_start = true;
10338 LEX(PM_TOKEN_KEYWORD_AND);
10339 }
10340
10341 // If we hit a 'or' then we are in a logical chain
10342 // and we need to return the logical operator.
10343 if (
10344 peek_at(parser, next_content) == 'o' &&
10345 peek_at(parser, next_content + 1) == 'r' &&
10346 peek_at(parser, next_content + 2) != '!' &&
10347 peek_at(parser, next_content + 2) != '?' &&
10348 !char_is_identifier(parser, next_content + 2, parser->end - (next_content + 2))
10349 ) {
10350 if (!lexed_comment) parser_lex_ignored_newline(parser);
10351 lex_state_set(parser, PM_LEX_STATE_BEG);
10352 parser->current.start = next_content;
10353 parser->current.end = next_content + 2;
10354 parser->next_start = NULL;
10355 parser->command_start = true;
10356 LEX(PM_TOKEN_KEYWORD_OR);
10357 }
10358 }
10359 }
10360
10361 // At this point we know this is a regular newline, and we can set the
10362 // necessary state and return the token.
10363 lex_state_set(parser, PM_LEX_STATE_BEG);
10364 parser->command_start = true;
10365 parser->current.type = PM_TOKEN_NEWLINE;
10366 if (!lexed_comment) parser_lex_callback(parser);
10367 return;
10368 }
10369
10370 // ,
10371 case ',':
10372 if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10373 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
10374 }
10375
10376 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10377 LEX(PM_TOKEN_COMMA);
10378
10379 // (
10380 case '(': {
10381 pm_token_type_t type = PM_TOKEN_PARENTHESIS_LEFT;
10382
10383 if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10384 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10385 }
10386
10387 parser->enclosure_nesting++;
10388 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10389 pm_do_loop_stack_push(parser, false);
10390 LEX(type);
10391 }
10392
10393 // )
10394 case ')':
10395 parser->enclosure_nesting--;
10396 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10397 pm_do_loop_stack_pop(parser);
10398 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10399
10400 // ;
10401 case ';':
10402 lex_state_set(parser, PM_LEX_STATE_BEG);
10403 parser->command_start = true;
10404 LEX(PM_TOKEN_SEMICOLON);
10405
10406 // [ [] []=
10407 case '[':
10408 parser->enclosure_nesting++;
10409 pm_token_type_t type = PM_TOKEN_BRACKET_LEFT;
10410
10411 if (lex_state_operator_p(parser)) {
10412 if (match(parser, ']')) {
10413 parser->enclosure_nesting--;
10414 lex_state_set(parser, PM_LEX_STATE_ARG);
10415 LEX(match(parser, '=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10416 }
10417
10418 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10419 LEX(type);
10420 }
10421
10422 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10423 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10424 }
10425
10426 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10427 pm_do_loop_stack_push(parser, false);
10428 LEX(type);
10429
10430 // ]
10431 case ']':
10432 parser->enclosure_nesting--;
10433 lex_state_set(parser, PM_LEX_STATE_END);
10434 pm_do_loop_stack_pop(parser);
10435 LEX(PM_TOKEN_BRACKET_RIGHT);
10436
10437 // {
10438 case '{': {
10439 pm_token_type_t type = PM_TOKEN_BRACE_LEFT;
10440
10441 if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10442 // This { begins a lambda
10443 parser->command_start = true;
10444 lex_state_set(parser, PM_LEX_STATE_BEG);
10445 type = PM_TOKEN_LAMBDA_BEGIN;
10446 } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10447 // This { begins a hash literal
10448 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10449 } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10450 // This { begins a block
10451 parser->command_start = true;
10452 lex_state_set(parser, PM_LEX_STATE_BEG);
10453 } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10454 // This { begins a block on a command
10455 parser->command_start = true;
10456 lex_state_set(parser, PM_LEX_STATE_BEG);
10457 } else {
10458 // This { begins a hash literal
10459 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10460 }
10461
10462 parser->enclosure_nesting++;
10463 parser->brace_nesting++;
10464 pm_do_loop_stack_push(parser, false);
10465
10466 LEX(type);
10467 }
10468
10469 // }
10470 case '}':
10471 parser->enclosure_nesting--;
10472 pm_do_loop_stack_pop(parser);
10473
10474 if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10475 lex_mode_pop(parser);
10476 LEX(PM_TOKEN_EMBEXPR_END);
10477 }
10478
10479 parser->brace_nesting--;
10480 lex_state_set(parser, PM_LEX_STATE_END);
10481 LEX(PM_TOKEN_BRACE_RIGHT);
10482
10483 // * ** **= *=
10484 case '*': {
10485 if (match(parser, '*')) {
10486 if (match(parser, '=')) {
10487 lex_state_set(parser, PM_LEX_STATE_BEG);
10488 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10489 }
10490
10491 pm_token_type_t type = PM_TOKEN_STAR_STAR;
10492
10493 if (lex_state_spcarg_p(parser, space_seen)) {
10494 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10495 type = PM_TOKEN_USTAR_STAR;
10496 } else if (lex_state_beg_p(parser)) {
10497 type = PM_TOKEN_USTAR_STAR;
10498 } else if (ambiguous_operator_p(parser, space_seen)) {
10499 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10500 }
10501
10502 if (lex_state_operator_p(parser)) {
10503 lex_state_set(parser, PM_LEX_STATE_ARG);
10504 } else {
10505 lex_state_set(parser, PM_LEX_STATE_BEG);
10506 }
10507
10508 LEX(type);
10509 }
10510
10511 if (match(parser, '=')) {
10512 lex_state_set(parser, PM_LEX_STATE_BEG);
10513 LEX(PM_TOKEN_STAR_EQUAL);
10514 }
10515
10516 pm_token_type_t type = PM_TOKEN_STAR;
10517
10518 if (lex_state_spcarg_p(parser, space_seen)) {
10519 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10520 type = PM_TOKEN_USTAR;
10521 } else if (lex_state_beg_p(parser)) {
10522 type = PM_TOKEN_USTAR;
10523 } else if (ambiguous_operator_p(parser, space_seen)) {
10524 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
10525 }
10526
10527 if (lex_state_operator_p(parser)) {
10528 lex_state_set(parser, PM_LEX_STATE_ARG);
10529 } else {
10530 lex_state_set(parser, PM_LEX_STATE_BEG);
10531 }
10532
10533 LEX(type);
10534 }
10535
10536 // ! != !~ !@
10537 case '!':
10538 if (lex_state_operator_p(parser)) {
10539 lex_state_set(parser, PM_LEX_STATE_ARG);
10540 if (match(parser, '@')) {
10541 LEX(PM_TOKEN_BANG);
10542 }
10543 } else {
10544 lex_state_set(parser, PM_LEX_STATE_BEG);
10545 }
10546
10547 if (match(parser, '=')) {
10548 LEX(PM_TOKEN_BANG_EQUAL);
10549 }
10550
10551 if (match(parser, '~')) {
10552 LEX(PM_TOKEN_BANG_TILDE);
10553 }
10554
10555 LEX(PM_TOKEN_BANG);
10556
10557 // = => =~ == === =begin
10558 case '=':
10559 if (
10560 current_token_starts_line(parser) &&
10561 (parser->current.end + 5 <= parser->end) &&
10562 memcmp(parser->current.end, "begin", 5) == 0 &&
10563 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
10564 ) {
10565 pm_token_type_t type = lex_embdoc(parser);
10566 if (type == PM_TOKEN_EOF) {
10567 LEX(type);
10568 }
10569
10570 goto lex_next_token;
10571 }
10572
10573 if (lex_state_operator_p(parser)) {
10574 lex_state_set(parser, PM_LEX_STATE_ARG);
10575 } else {
10576 lex_state_set(parser, PM_LEX_STATE_BEG);
10577 }
10578
10579 if (match(parser, '>')) {
10580 LEX(PM_TOKEN_EQUAL_GREATER);
10581 }
10582
10583 if (match(parser, '~')) {
10584 LEX(PM_TOKEN_EQUAL_TILDE);
10585 }
10586
10587 if (match(parser, '=')) {
10588 LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10589 }
10590
10591 LEX(PM_TOKEN_EQUAL);
10592
10593 // < << <<= <= <=>
10594 case '<':
10595 if (match(parser, '<')) {
10596 if (
10597 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10598 !lex_state_end_p(parser) &&
10599 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10600 ) {
10601 const uint8_t *end = parser->current.end;
10602
10603 pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
10604 pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
10605
10606 if (match(parser, '-')) {
10607 indent = PM_HEREDOC_INDENT_DASH;
10608 }
10609 else if (match(parser, '~')) {
10610 indent = PM_HEREDOC_INDENT_TILDE;
10611 }
10612
10613 if (match(parser, '`')) {
10614 quote = PM_HEREDOC_QUOTE_BACKTICK;
10615 }
10616 else if (match(parser, '"')) {
10617 quote = PM_HEREDOC_QUOTE_DOUBLE;
10618 }
10619 else if (match(parser, '\'')) {
10620 quote = PM_HEREDOC_QUOTE_SINGLE;
10621 }
10622
10623 const uint8_t *ident_start = parser->current.end;
10624 size_t width = 0;
10625
10626 if (parser->current.end >= parser->end) {
10627 parser->current.end = end;
10628 } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end)) == 0) {
10629 parser->current.end = end;
10630 } else {
10631 if (quote == PM_HEREDOC_QUOTE_NONE) {
10632 parser->current.end += width;
10633
10634 while ((width = char_is_identifier(parser, parser->current.end, parser->end - parser->current.end))) {
10635 parser->current.end += width;
10636 }
10637 } else {
10638 // If we have quotes, then we're going to go until we find the
10639 // end quote.
10640 while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
10641 if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
10642 parser->current.end++;
10643 }
10644 }
10645
10646 size_t ident_length = (size_t) (parser->current.end - ident_start);
10647 bool ident_error = false;
10648
10649 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10650 pm_parser_err(parser, U32(ident_start - parser->start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10651 ident_error = true;
10652 }
10653
10654 parser->explicit_encoding = NULL;
10655 lex_mode_push(parser, (pm_lex_mode_t) {
10656 .mode = PM_LEX_HEREDOC,
10657 .as.heredoc = {
10658 .base = {
10659 .ident_start = ident_start,
10660 .ident_length = ident_length,
10661 .quote = quote,
10662 .indent = indent
10663 },
10664 .next_start = parser->current.end,
10665 .common_whitespace = NULL,
10666 .line_continuation = false
10667 }
10668 });
10669
10670 if (parser->heredoc_end == NULL) {
10671 const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
10672
10673 if (body_start == NULL) {
10674 // If there is no newline after the heredoc identifier, then
10675 // this is not a valid heredoc declaration. In this case we
10676 // will add an error, but we will still return a heredoc
10677 // start.
10678 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10679 body_start = parser->end;
10680 } else {
10681 // Otherwise, we want to indicate that the body of the
10682 // heredoc starts on the character after the next newline.
10683 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(body_start - parser->start + 1));
10684 body_start++;
10685 }
10686
10687 parser->next_start = body_start;
10688 } else {
10689 parser->next_start = parser->heredoc_end;
10690 }
10691
10692 LEX(PM_TOKEN_HEREDOC_START);
10693 }
10694 }
10695
10696 if (match(parser, '=')) {
10697 lex_state_set(parser, PM_LEX_STATE_BEG);
10698 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10699 }
10700
10701 if (ambiguous_operator_p(parser, space_seen)) {
10702 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
10703 }
10704
10705 if (lex_state_operator_p(parser)) {
10706 lex_state_set(parser, PM_LEX_STATE_ARG);
10707 } else {
10708 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10709 lex_state_set(parser, PM_LEX_STATE_BEG);
10710 }
10711
10712 LEX(PM_TOKEN_LESS_LESS);
10713 }
10714
10715 if (lex_state_operator_p(parser)) {
10716 lex_state_set(parser, PM_LEX_STATE_ARG);
10717 } else {
10718 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
10719 lex_state_set(parser, PM_LEX_STATE_BEG);
10720 }
10721
10722 if (match(parser, '=')) {
10723 if (match(parser, '>')) {
10724 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10725 }
10726
10727 LEX(PM_TOKEN_LESS_EQUAL);
10728 }
10729
10730 LEX(PM_TOKEN_LESS);
10731
10732 // > >> >>= >=
10733 case '>':
10734 if (match(parser, '>')) {
10735 if (lex_state_operator_p(parser)) {
10736 lex_state_set(parser, PM_LEX_STATE_ARG);
10737 } else {
10738 lex_state_set(parser, PM_LEX_STATE_BEG);
10739 }
10740 LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10741 }
10742
10743 if (lex_state_operator_p(parser)) {
10744 lex_state_set(parser, PM_LEX_STATE_ARG);
10745 } else {
10746 lex_state_set(parser, PM_LEX_STATE_BEG);
10747 }
10748
10749 LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10750
10751 // double-quoted string literal
10752 case '"': {
10753 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10754 lex_mode_push_string(parser, true, label_allowed, '\0', '"');
10755 LEX(PM_TOKEN_STRING_BEGIN);
10756 }
10757
10758 // xstring literal
10759 case '`': {
10760 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10761 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10762 LEX(PM_TOKEN_BACKTICK);
10763 }
10764
10765 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10766 if (previous_command_start) {
10767 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10768 } else {
10769 lex_state_set(parser, PM_LEX_STATE_ARG);
10770 }
10771
10772 LEX(PM_TOKEN_BACKTICK);
10773 }
10774
10775 lex_mode_push_string(parser, true, false, '\0', '`');
10776 LEX(PM_TOKEN_BACKTICK);
10777 }
10778
10779 // single-quoted string literal
10780 case '\'': {
10781 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10782 lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
10783 LEX(PM_TOKEN_STRING_BEGIN);
10784 }
10785
10786 // ? character literal
10787 case '?':
10788 LEX(lex_question_mark(parser));
10789
10790 // & && &&= &=
10791 case '&': {
10792 if (match(parser, '&')) {
10793 lex_state_set(parser, PM_LEX_STATE_BEG);
10794
10795 if (match(parser, '=')) {
10796 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10797 }
10798
10799 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10800 }
10801
10802 if (match(parser, '=')) {
10803 lex_state_set(parser, PM_LEX_STATE_BEG);
10804 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10805 }
10806
10807 if (match(parser, '.')) {
10808 lex_state_set(parser, PM_LEX_STATE_DOT);
10809 LEX(PM_TOKEN_AMPERSAND_DOT);
10810 }
10811
10812 pm_token_type_t type = PM_TOKEN_AMPERSAND;
10813 if (lex_state_spcarg_p(parser, space_seen)) {
10814 if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
10815 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10816 } else {
10817 const uint8_t delim = peek_offset(parser, 1);
10818
10819 if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1, parser->end - (parser->current.end + 1))) {
10820 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10821 }
10822 }
10823
10824 type = PM_TOKEN_UAMPERSAND;
10825 } else if (lex_state_beg_p(parser)) {
10826 type = PM_TOKEN_UAMPERSAND;
10827 } else if (ambiguous_operator_p(parser, space_seen)) {
10828 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
10829 }
10830
10831 if (lex_state_operator_p(parser)) {
10832 lex_state_set(parser, PM_LEX_STATE_ARG);
10833 } else {
10834 lex_state_set(parser, PM_LEX_STATE_BEG);
10835 }
10836
10837 LEX(type);
10838 }
10839
10840 // | || ||= |=
10841 case '|':
10842 if (match(parser, '|')) {
10843 if (match(parser, '=')) {
10844 lex_state_set(parser, PM_LEX_STATE_BEG);
10845 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10846 }
10847
10848 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10849 parser->current.end--;
10850 LEX(PM_TOKEN_PIPE);
10851 }
10852
10853 lex_state_set(parser, PM_LEX_STATE_BEG);
10854 LEX(PM_TOKEN_PIPE_PIPE);
10855 }
10856
10857 if (match(parser, '=')) {
10858 lex_state_set(parser, PM_LEX_STATE_BEG);
10859 LEX(PM_TOKEN_PIPE_EQUAL);
10860 }
10861
10862 if (lex_state_operator_p(parser)) {
10863 lex_state_set(parser, PM_LEX_STATE_ARG);
10864 } else {
10865 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10866 }
10867
10868 LEX(PM_TOKEN_PIPE);
10869
10870 // + += +@
10871 case '+': {
10872 if (lex_state_operator_p(parser)) {
10873 lex_state_set(parser, PM_LEX_STATE_ARG);
10874
10875 if (match(parser, '@')) {
10876 LEX(PM_TOKEN_UPLUS);
10877 }
10878
10879 LEX(PM_TOKEN_PLUS);
10880 }
10881
10882 if (match(parser, '=')) {
10883 lex_state_set(parser, PM_LEX_STATE_BEG);
10884 LEX(PM_TOKEN_PLUS_EQUAL);
10885 }
10886
10887 if (
10888 lex_state_beg_p(parser) ||
10889 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
10890 ) {
10891 lex_state_set(parser, PM_LEX_STATE_BEG);
10892
10893 if (pm_char_is_decimal_digit(peek(parser))) {
10894 parser->current.end++;
10895 pm_token_type_t type = lex_numeric(parser);
10896 lex_state_set(parser, PM_LEX_STATE_END);
10897 LEX(type);
10898 }
10899
10900 LEX(PM_TOKEN_UPLUS);
10901 }
10902
10903 if (ambiguous_operator_p(parser, space_seen)) {
10904 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
10905 }
10906
10907 lex_state_set(parser, PM_LEX_STATE_BEG);
10908 LEX(PM_TOKEN_PLUS);
10909 }
10910
10911 // - -= -@
10912 case '-': {
10913 if (lex_state_operator_p(parser)) {
10914 lex_state_set(parser, PM_LEX_STATE_ARG);
10915
10916 if (match(parser, '@')) {
10917 LEX(PM_TOKEN_UMINUS);
10918 }
10919
10920 LEX(PM_TOKEN_MINUS);
10921 }
10922
10923 if (match(parser, '=')) {
10924 lex_state_set(parser, PM_LEX_STATE_BEG);
10925 LEX(PM_TOKEN_MINUS_EQUAL);
10926 }
10927
10928 if (match(parser, '>')) {
10929 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10930 LEX(PM_TOKEN_MINUS_GREATER);
10931 }
10932
10933 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10934 bool is_beg = lex_state_beg_p(parser);
10935 if (!is_beg && spcarg) {
10936 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10937 }
10938
10939 if (is_beg || spcarg) {
10940 lex_state_set(parser, PM_LEX_STATE_BEG);
10941 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10942 }
10943
10944 if (ambiguous_operator_p(parser, space_seen)) {
10945 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
10946 }
10947
10948 lex_state_set(parser, PM_LEX_STATE_BEG);
10949 LEX(PM_TOKEN_MINUS);
10950 }
10951
10952 // . .. ...
10953 case '.': {
10954 bool beg_p = lex_state_beg_p(parser);
10955
10956 if (match(parser, '.')) {
10957 if (match(parser, '.')) {
10958 // If we're _not_ inside a range within default parameters
10959 if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
10960 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10961 lex_state_set(parser, PM_LEX_STATE_BEG);
10962 } else {
10963 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10964 }
10965 LEX(PM_TOKEN_UDOT_DOT_DOT);
10966 }
10967
10968 if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
10969 pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
10970 }
10971
10972 lex_state_set(parser, PM_LEX_STATE_BEG);
10973 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10974 }
10975
10976 lex_state_set(parser, PM_LEX_STATE_BEG);
10977 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10978 }
10979
10980 lex_state_set(parser, PM_LEX_STATE_DOT);
10981 LEX(PM_TOKEN_DOT);
10982 }
10983
10984 // integer
10985 case '0':
10986 case '1':
10987 case '2':
10988 case '3':
10989 case '4':
10990 case '5':
10991 case '6':
10992 case '7':
10993 case '8':
10994 case '9': {
10995 pm_token_type_t type = lex_numeric(parser);
10996 lex_state_set(parser, PM_LEX_STATE_END);
10997 LEX(type);
10998 }
10999
11000 // :: symbol
11001 case ':':
11002 if (match(parser, ':')) {
11003 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11004 lex_state_set(parser, PM_LEX_STATE_BEG);
11005 LEX(PM_TOKEN_UCOLON_COLON);
11006 }
11007
11008 lex_state_set(parser, PM_LEX_STATE_DOT);
11009 LEX(PM_TOKEN_COLON_COLON);
11010 }
11011
11012 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11013 lex_state_set(parser, PM_LEX_STATE_BEG);
11014 LEX(PM_TOKEN_COLON);
11015 }
11016
11017 if (peek(parser) == '"' || peek(parser) == '\'') {
11018 lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11019 parser->current.end++;
11020 }
11021
11022 lex_state_set(parser, PM_LEX_STATE_FNAME);
11023 LEX(PM_TOKEN_SYMBOL_BEGIN);
11024
11025 // / /=
11026 case '/':
11027 if (lex_state_beg_p(parser)) {
11028 lex_mode_push_regexp(parser, '\0', '/');
11029 LEX(PM_TOKEN_REGEXP_BEGIN);
11030 }
11031
11032 if (match(parser, '=')) {
11033 lex_state_set(parser, PM_LEX_STATE_BEG);
11034 LEX(PM_TOKEN_SLASH_EQUAL);
11035 }
11036
11037 if (lex_state_spcarg_p(parser, space_seen)) {
11038 pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11039 lex_mode_push_regexp(parser, '\0', '/');
11040 LEX(PM_TOKEN_REGEXP_BEGIN);
11041 }
11042
11043 if (ambiguous_operator_p(parser, space_seen)) {
11044 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11045 }
11046
11047 if (lex_state_operator_p(parser)) {
11048 lex_state_set(parser, PM_LEX_STATE_ARG);
11049 } else {
11050 lex_state_set(parser, PM_LEX_STATE_BEG);
11051 }
11052
11053 LEX(PM_TOKEN_SLASH);
11054
11055 // ^ ^=
11056 case '^':
11057 if (lex_state_operator_p(parser)) {
11058 lex_state_set(parser, PM_LEX_STATE_ARG);
11059 } else {
11060 lex_state_set(parser, PM_LEX_STATE_BEG);
11061 }
11062 LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11063
11064 // ~ ~@
11065 case '~':
11066 if (lex_state_operator_p(parser)) {
11067 (void) match(parser, '@');
11068 lex_state_set(parser, PM_LEX_STATE_ARG);
11069 } else {
11070 lex_state_set(parser, PM_LEX_STATE_BEG);
11071 }
11072
11073 LEX(PM_TOKEN_TILDE);
11074
11075 // % %= %i %I %q %Q %w %W
11076 case '%': {
11077 // If there is no subsequent character then we have an
11078 // invalid token. We're going to say it's the percent
11079 // operator because we don't want to move into the string
11080 // lex mode unnecessarily.
11081 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11082 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11083 LEX(PM_TOKEN_PERCENT);
11084 }
11085
11086 if (!lex_state_beg_p(parser) && match(parser, '=')) {
11087 lex_state_set(parser, PM_LEX_STATE_BEG);
11088 LEX(PM_TOKEN_PERCENT_EQUAL);
11089 } else if (
11090 lex_state_beg_p(parser) ||
11091 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11092 lex_state_spcarg_p(parser, space_seen)
11093 ) {
11094 if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11095 if (*parser->current.end >= 0x80) {
11096 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11097 }
11098
11099 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11100 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11101 LEX(PM_TOKEN_STRING_BEGIN);
11102 }
11103
11104 // Delimiters for %-literals cannot be alphanumeric. We
11105 // validate that here.
11106 uint8_t delimiter = peek_offset(parser, 1);
11107 if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11108 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11109 goto lex_next_token;
11110 }
11111
11112 switch (peek(parser)) {
11113 case 'i': {
11114 parser->current.end++;
11115
11116 if (parser->current.end < parser->end) {
11117 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11118 } else {
11119 lex_mode_push_list_eof(parser);
11120 }
11121
11122 LEX(PM_TOKEN_PERCENT_LOWER_I);
11123 }
11124 case 'I': {
11125 parser->current.end++;
11126
11127 if (parser->current.end < parser->end) {
11128 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11129 } else {
11130 lex_mode_push_list_eof(parser);
11131 }
11132
11133 LEX(PM_TOKEN_PERCENT_UPPER_I);
11134 }
11135 case 'r': {
11136 parser->current.end++;
11137
11138 if (parser->current.end < parser->end) {
11139 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11140 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11141 } else {
11142 lex_mode_push_regexp(parser, '\0', '\0');
11143 }
11144
11145 LEX(PM_TOKEN_REGEXP_BEGIN);
11146 }
11147 case 'q': {
11148 parser->current.end++;
11149
11150 if (parser->current.end < parser->end) {
11151 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11152 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11153 } else {
11154 lex_mode_push_string_eof(parser);
11155 }
11156
11157 LEX(PM_TOKEN_STRING_BEGIN);
11158 }
11159 case 'Q': {
11160 parser->current.end++;
11161
11162 if (parser->current.end < parser->end) {
11163 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11164 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11165 } else {
11166 lex_mode_push_string_eof(parser);
11167 }
11168
11169 LEX(PM_TOKEN_STRING_BEGIN);
11170 }
11171 case 's': {
11172 parser->current.end++;
11173
11174 if (parser->current.end < parser->end) {
11175 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11176 lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11177 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11178 } else {
11179 lex_mode_push_string_eof(parser);
11180 }
11181
11182 LEX(PM_TOKEN_SYMBOL_BEGIN);
11183 }
11184 case 'w': {
11185 parser->current.end++;
11186
11187 if (parser->current.end < parser->end) {
11188 lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11189 } else {
11190 lex_mode_push_list_eof(parser);
11191 }
11192
11193 LEX(PM_TOKEN_PERCENT_LOWER_W);
11194 }
11195 case 'W': {
11196 parser->current.end++;
11197
11198 if (parser->current.end < parser->end) {
11199 lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11200 } else {
11201 lex_mode_push_list_eof(parser);
11202 }
11203
11204 LEX(PM_TOKEN_PERCENT_UPPER_W);
11205 }
11206 case 'x': {
11207 parser->current.end++;
11208
11209 if (parser->current.end < parser->end) {
11210 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11211 lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11212 } else {
11213 lex_mode_push_string_eof(parser);
11214 }
11215
11216 LEX(PM_TOKEN_PERCENT_LOWER_X);
11217 }
11218 default:
11219 // If we get to this point, then we have a % that is completely
11220 // unparsable. In this case we'll just drop it from the parser
11221 // and skip past it and hope that the next token is something
11222 // that we can parse.
11223 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11224 goto lex_next_token;
11225 }
11226 }
11227
11228 if (ambiguous_operator_p(parser, space_seen)) {
11229 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11230 }
11231
11232 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11233 LEX(PM_TOKEN_PERCENT);
11234 }
11235
11236 // global variable
11237 case '$': {
11238 pm_token_type_t type = lex_global_variable(parser);
11239
11240 // If we're lexing an embedded variable, then we need to pop back into
11241 // the parent lex context.
11242 if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11243 lex_mode_pop(parser);
11244 }
11245
11246 lex_state_set(parser, PM_LEX_STATE_END);
11247 LEX(type);
11248 }
11249
11250 // instance variable, class variable
11251 case '@':
11252 lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11253 LEX(lex_at_variable(parser));
11254
11255 default: {
11256 if (*parser->current.start != '_') {
11257 size_t width = char_is_identifier_start(parser, parser->current.start, parser->end - parser->current.start);
11258
11259 // If this isn't the beginning of an identifier, then
11260 // it's an invalid token as we've exhausted all of the
11261 // other options. We'll skip past it and return the next
11262 // token after adding an appropriate error message.
11263 if (!width) {
11264 if (*parser->current.start >= 0x80) {
11265 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11266 } else if (*parser->current.start == '\\') {
11267 switch (peek_at(parser, parser->current.start + 1)) {
11268 case ' ':
11269 parser->current.end++;
11270 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11271 break;
11272 case '\f':
11273 parser->current.end++;
11274 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11275 break;
11276 case '\t':
11277 parser->current.end++;
11278 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11279 break;
11280 case '\v':
11281 parser->current.end++;
11282 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11283 break;
11284 case '\r':
11285 if (peek_at(parser, parser->current.start + 2) != '\n') {
11286 parser->current.end++;
11287 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11288 break;
11289 }
11291 default:
11292 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11293 break;
11294 }
11295 } else if (char_is_ascii_printable(*parser->current.start)) {
11296 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11297 } else {
11298 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11299 }
11300
11301 goto lex_next_token;
11302 }
11303
11304 parser->current.end = parser->current.start + width;
11305 }
11306
11307 pm_token_type_t type = lex_identifier(parser, previous_command_start);
11308
11309 // If we've hit a __END__ and it was at the start of the
11310 // line or the start of the file and it is followed by
11311 // either a \n or a \r\n, then this is the last token of the
11312 // file.
11313 if (
11314 ((parser->current.end - parser->current.start) == 7) &&
11315 current_token_starts_line(parser) &&
11316 (memcmp(parser->current.start, "__END__", 7) == 0) &&
11317 (parser->current.end == parser->end || match_eol(parser))
11318 ) {
11319 // Since we know we're about to add an __END__ comment,
11320 // we know we need to add all of the newlines to get the
11321 // correct column information for it.
11322 const uint8_t *cursor = parser->current.end;
11323 while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11324 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(++cursor - parser->start));
11325 }
11326
11327 parser->current.end = parser->end;
11328 parser->current.type = PM_TOKEN___END__;
11329 parser_lex_callback(parser);
11330
11331 parser->data_loc.start = PM_TOKEN_START(parser, &parser->current);
11332 parser->data_loc.length = PM_TOKEN_LENGTH(&parser->current);
11333
11334 LEX(PM_TOKEN_EOF);
11335 }
11336
11337 pm_lex_state_t last_state = parser->lex_state;
11338
11339 if (type == PM_TOKEN_IDENTIFIER || type == PM_TOKEN_CONSTANT || type == PM_TOKEN_METHOD_NAME) {
11340 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11341 if (previous_command_start) {
11342 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11343 } else {
11344 lex_state_set(parser, PM_LEX_STATE_ARG);
11345 }
11346 } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11347 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11348 } else {
11349 lex_state_set(parser, PM_LEX_STATE_END);
11350 }
11351 }
11352
11353 if (
11354 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11355 (type == PM_TOKEN_IDENTIFIER) &&
11356 ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11357 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)))
11358 ) {
11359 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11360 }
11361
11362 LEX(type);
11363 }
11364 }
11365 }
11366 case PM_LEX_LIST: {
11367 if (parser->next_start != NULL) {
11368 parser->current.end = parser->next_start;
11369 parser->next_start = NULL;
11370 }
11371
11372 // First we'll set the beginning of the token.
11373 parser->current.start = parser->current.end;
11374
11375 // If there's any whitespace at the start of the list, then we're
11376 // going to trim it off the beginning and create a new token.
11377 size_t whitespace;
11378
11379 if (parser->heredoc_end) {
11380 whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11381 if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11382 whitespace += 1;
11383 }
11384 } else {
11385 whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11386 }
11387
11388 if (whitespace > 0) {
11389 parser->current.end += whitespace;
11390 if (peek_offset(parser, -1) == '\n') {
11391 // mutates next_start
11392 parser_flush_heredoc_end(parser);
11393 }
11394 LEX(PM_TOKEN_WORDS_SEP);
11395 }
11396
11397 // We'll check if we're at the end of the file. If we are, then we
11398 // need to return the EOF token.
11399 if (parser->current.end >= parser->end) {
11400 LEX(PM_TOKEN_EOF);
11401 }
11402
11403 // Here we'll get a list of the places where strpbrk should break,
11404 // and then find the first one.
11405 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11406 const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11407 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11408
11409 // If we haven't found an escape yet, then this buffer will be
11410 // unallocated since we can refer directly to the source string.
11411 pm_token_buffer_t token_buffer = { 0 };
11412
11413 while (breakpoint != NULL) {
11414 // If we hit whitespace, then we must have received content by
11415 // now, so we can return an element of the list.
11416 if (pm_char_is_whitespace(*breakpoint)) {
11417 parser->current.end = breakpoint;
11418 pm_token_buffer_flush(parser, &token_buffer);
11419 LEX(PM_TOKEN_STRING_CONTENT);
11420 }
11421
11422 // If we hit the terminator, we need to check which token to
11423 // return.
11424 if (*breakpoint == lex_mode->as.list.terminator) {
11425 // If this terminator doesn't actually close the list, then
11426 // we need to continue on past it.
11427 if (lex_mode->as.list.nesting > 0) {
11428 parser->current.end = breakpoint + 1;
11429 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11430 lex_mode->as.list.nesting--;
11431 continue;
11432 }
11433
11434 // If we've hit the terminator and we've already skipped
11435 // past content, then we can return a list node.
11436 if (breakpoint > parser->current.start) {
11437 parser->current.end = breakpoint;
11438 pm_token_buffer_flush(parser, &token_buffer);
11439 LEX(PM_TOKEN_STRING_CONTENT);
11440 }
11441
11442 // Otherwise, switch back to the default state and return
11443 // the end of the list.
11444 parser->current.end = breakpoint + 1;
11445 lex_mode_pop(parser);
11446 lex_state_set(parser, PM_LEX_STATE_END);
11447 LEX(PM_TOKEN_STRING_END);
11448 }
11449
11450 // If we hit a null byte, skip directly past it.
11451 if (*breakpoint == '\0') {
11452 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11453 continue;
11454 }
11455
11456 // If we hit escapes, then we need to treat the next token
11457 // literally. In this case we'll skip past the next character
11458 // and find the next breakpoint.
11459 if (*breakpoint == '\\') {
11460 parser->current.end = breakpoint + 1;
11461
11462 // If we've hit the end of the file, then break out of the
11463 // loop by setting the breakpoint to NULL.
11464 if (parser->current.end == parser->end) {
11465 breakpoint = NULL;
11466 continue;
11467 }
11468
11469 pm_token_buffer_escape(parser, &token_buffer);
11470 uint8_t peeked = peek(parser);
11471
11472 switch (peeked) {
11473 case ' ':
11474 case '\f':
11475 case '\t':
11476 case '\v':
11477 case '\\':
11478 pm_token_buffer_push_byte(&token_buffer, peeked);
11479 parser->current.end++;
11480 break;
11481 case '\r':
11482 parser->current.end++;
11483 if (peek(parser) != '\n') {
11484 pm_token_buffer_push_byte(&token_buffer, '\r');
11485 break;
11486 }
11488 case '\n':
11489 pm_token_buffer_push_byte(&token_buffer, '\n');
11490
11491 if (parser->heredoc_end) {
11492 // ... if we are on the same line as a heredoc,
11493 // flush the heredoc and continue parsing after
11494 // heredoc_end.
11495 parser_flush_heredoc_end(parser);
11496 pm_token_buffer_copy(parser, &token_buffer);
11497 LEX(PM_TOKEN_STRING_CONTENT);
11498 } else {
11499 // ... else track the newline.
11500 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11501 }
11502
11503 parser->current.end++;
11504 break;
11505 default:
11506 if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
11507 pm_token_buffer_push_byte(&token_buffer, peeked);
11508 parser->current.end++;
11509 } else if (lex_mode->as.list.interpolation) {
11510 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
11511 } else {
11512 pm_token_buffer_push_byte(&token_buffer, '\\');
11513 pm_token_buffer_push_escaped(&token_buffer, parser);
11514 }
11515
11516 break;
11517 }
11518
11519 token_buffer.cursor = parser->current.end;
11520 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11521 continue;
11522 }
11523
11524 // If we hit a #, then we will attempt to lex interpolation.
11525 if (*breakpoint == '#') {
11526 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11527
11528 if (!type) {
11529 // If we haven't returned at this point then we had something
11530 // that looked like an interpolated class or instance variable
11531 // like "#@" but wasn't actually. In this case we'll just skip
11532 // to the next breakpoint.
11533 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11534 continue;
11535 }
11536
11537 if (type == PM_TOKEN_STRING_CONTENT) {
11538 pm_token_buffer_flush(parser, &token_buffer);
11539 }
11540
11541 LEX(type);
11542 }
11543
11544 // If we've hit the incrementor, then we need to skip past it
11545 // and find the next breakpoint.
11546 assert(*breakpoint == lex_mode->as.list.incrementor);
11547 parser->current.end = breakpoint + 1;
11548 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11549 lex_mode->as.list.nesting++;
11550 continue;
11551 }
11552
11553 if (parser->current.end > parser->current.start) {
11554 pm_token_buffer_flush(parser, &token_buffer);
11555 LEX(PM_TOKEN_STRING_CONTENT);
11556 }
11557
11558 // If we were unable to find a breakpoint, then this token hits the
11559 // end of the file.
11560 parser->current.end = parser->end;
11561 pm_token_buffer_flush(parser, &token_buffer);
11562 LEX(PM_TOKEN_STRING_CONTENT);
11563 }
11564 case PM_LEX_REGEXP: {
11565 // First, we'll set to start of this token to be the current end.
11566 if (parser->next_start == NULL) {
11567 parser->current.start = parser->current.end;
11568 } else {
11569 parser->current.start = parser->next_start;
11570 parser->current.end = parser->next_start;
11571 parser->next_start = NULL;
11572 }
11573
11574 // We'll check if we're at the end of the file. If we are, then we
11575 // need to return the EOF token.
11576 if (parser->current.end >= parser->end) {
11577 LEX(PM_TOKEN_EOF);
11578 }
11579
11580 // Get a reference to the current mode.
11581 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11582
11583 // These are the places where we need to split up the content of the
11584 // regular expression. We'll use strpbrk to find the first of these
11585 // characters.
11586 const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
11587 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11588 pm_regexp_token_buffer_t token_buffer = { 0 };
11589
11590 while (breakpoint != NULL) {
11591 uint8_t term = lex_mode->as.regexp.terminator;
11592 bool is_terminator = (*breakpoint == term);
11593
11594 // If the terminator is newline, we need to consider \r\n _also_ a newline
11595 // For example: `%\nfoo\r\n`
11596 // The string should be "foo", not "foo\r"
11597 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11598 if (term == '\n') {
11599 is_terminator = true;
11600 }
11601
11602 // If the terminator is a CR, but we see a CRLF, we need to
11603 // treat the CRLF as a newline, meaning this is _not_ the
11604 // terminator
11605 if (term == '\r') {
11606 is_terminator = false;
11607 }
11608 }
11609
11610 // If we hit the terminator, we need to determine what kind of
11611 // token to return.
11612 if (is_terminator) {
11613 if (lex_mode->as.regexp.nesting > 0) {
11614 parser->current.end = breakpoint + 1;
11615 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11616 lex_mode->as.regexp.nesting--;
11617 continue;
11618 }
11619
11620 // Here we've hit the terminator. If we have already consumed
11621 // content then we need to return that content as string content
11622 // first.
11623 if (breakpoint > parser->current.start) {
11624 parser->current.end = breakpoint;
11625 pm_regexp_token_buffer_flush(parser, &token_buffer);
11626 LEX(PM_TOKEN_STRING_CONTENT);
11627 }
11628
11629 // Check here if we need to track the newline.
11630 size_t eol_length = match_eol_at(parser, breakpoint);
11631 if (eol_length) {
11632 parser->current.end = breakpoint + eol_length;
11633
11634 // Track the newline if we're not in a heredoc that
11635 // would have already have added the newline to the
11636 // list.
11637 if (parser->heredoc_end == NULL) {
11638 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11639 }
11640 } else {
11641 parser->current.end = breakpoint + 1;
11642 }
11643
11644 // Since we've hit the terminator of the regular expression,
11645 // we now need to parse the options.
11646 parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
11647
11648 lex_mode_pop(parser);
11649 lex_state_set(parser, PM_LEX_STATE_END);
11650 LEX(PM_TOKEN_REGEXP_END);
11651 }
11652
11653 // If we've hit the incrementor, then we need to skip past it
11654 // and find the next breakpoint.
11655 if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
11656 parser->current.end = breakpoint + 1;
11657 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11658 lex_mode->as.regexp.nesting++;
11659 continue;
11660 }
11661
11662 switch (*breakpoint) {
11663 case '\0':
11664 // If we hit a null byte, skip directly past it.
11665 parser->current.end = breakpoint + 1;
11666 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11667 break;
11668 case '\r':
11669 if (peek_at(parser, breakpoint + 1) != '\n') {
11670 parser->current.end = breakpoint + 1;
11671 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11672 break;
11673 }
11674
11675 breakpoint++;
11676 parser->current.end = breakpoint;
11677 pm_regexp_token_buffer_escape(parser, &token_buffer);
11678 token_buffer.base.cursor = breakpoint;
11679
11681 case '\n':
11682 // If we've hit a newline, then we need to track that in
11683 // the list of newlines.
11684 if (parser->heredoc_end == NULL) {
11685 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
11686 parser->current.end = breakpoint + 1;
11687 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11688 break;
11689 }
11690
11691 parser->current.end = breakpoint + 1;
11692 parser_flush_heredoc_end(parser);
11693 pm_regexp_token_buffer_flush(parser, &token_buffer);
11694 LEX(PM_TOKEN_STRING_CONTENT);
11695 case '\\': {
11696 // If we hit escapes, then we need to treat the next
11697 // token literally. In this case we'll skip past the
11698 // next character and find the next breakpoint.
11699 parser->current.end = breakpoint + 1;
11700
11701 // If we've hit the end of the file, then break out of
11702 // the loop by setting the breakpoint to NULL.
11703 if (parser->current.end == parser->end) {
11704 breakpoint = NULL;
11705 break;
11706 }
11707
11708 pm_regexp_token_buffer_escape(parser, &token_buffer);
11709 uint8_t peeked = peek(parser);
11710
11711 switch (peeked) {
11712 case '\r':
11713 parser->current.end++;
11714 if (peek(parser) != '\n') {
11715 if (lex_mode->as.regexp.terminator != '\r') {
11716 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11717 }
11718 pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
11719 pm_token_buffer_push_byte(&token_buffer.base, '\r');
11720 break;
11721 }
11723 case '\n':
11724 if (parser->heredoc_end) {
11725 // ... if we are on the same line as a heredoc,
11726 // flush the heredoc and continue parsing after
11727 // heredoc_end.
11728 parser_flush_heredoc_end(parser);
11729 pm_regexp_token_buffer_copy(parser, &token_buffer);
11730 LEX(PM_TOKEN_STRING_CONTENT);
11731 } else {
11732 // ... else track the newline.
11733 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
11734 }
11735
11736 parser->current.end++;
11737 break;
11738 case 'c':
11739 case 'C':
11740 case 'M':
11741 case 'u':
11742 case 'x':
11743 escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
11744 break;
11745 default:
11746 if (lex_mode->as.regexp.terminator == peeked) {
11747 // Some characters when they are used as the
11748 // terminator also receive an escape. They are
11749 // enumerated here.
11750 switch (peeked) {
11751 case '$': case ')': case '*': case '+':
11752 case '.': case '>': case '?': case ']':
11753 case '^': case '|': case '}':
11754 pm_token_buffer_push_byte(&token_buffer.base, '\\');
11755 break;
11756 default:
11757 break;
11758 }
11759
11760 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11761 pm_token_buffer_push_byte(&token_buffer.base, peeked);
11762 parser->current.end++;
11763 break;
11764 }
11765
11766 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
11767 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11768 break;
11769 }
11770
11771 token_buffer.base.cursor = parser->current.end;
11772 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11773 break;
11774 }
11775 case '#': {
11776 // If we hit a #, then we will attempt to lex
11777 // interpolation.
11778 pm_token_type_t type = lex_interpolation(parser, breakpoint);
11779
11780 if (!type) {
11781 // If we haven't returned at this point then we had
11782 // something that looked like an interpolated class or
11783 // instance variable like "#@" but wasn't actually. In
11784 // this case we'll just skip to the next breakpoint.
11785 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
11786 break;
11787 }
11788
11789 if (type == PM_TOKEN_STRING_CONTENT) {
11790 pm_regexp_token_buffer_flush(parser, &token_buffer);
11791 }
11792
11793 LEX(type);
11794 }
11795 default:
11796 assert(false && "unreachable");
11797 break;
11798 }
11799 }
11800
11801 if (parser->current.end > parser->current.start) {
11802 pm_regexp_token_buffer_flush(parser, &token_buffer);
11803 LEX(PM_TOKEN_STRING_CONTENT);
11804 }
11805
11806 // If we were unable to find a breakpoint, then this token hits the
11807 // end of the file.
11808 parser->current.end = parser->end;
11809 pm_regexp_token_buffer_flush(parser, &token_buffer);
11810 LEX(PM_TOKEN_STRING_CONTENT);
11811 }
11812 case PM_LEX_STRING: {
11813 // First, we'll set to start of this token to be the current end.
11814 if (parser->next_start == NULL) {
11815 parser->current.start = parser->current.end;
11816 } else {
11817 parser->current.start = parser->next_start;
11818 parser->current.end = parser->next_start;
11819 parser->next_start = NULL;
11820 }
11821
11822 // We'll check if we're at the end of the file. If we are, then we need to
11823 // return the EOF token.
11824 if (parser->current.end >= parser->end) {
11825 LEX(PM_TOKEN_EOF);
11826 }
11827
11828 // These are the places where we need to split up the content of the
11829 // string. We'll use strpbrk to find the first of these characters.
11830 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11831 const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
11832 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11833
11834 // If we haven't found an escape yet, then this buffer will be
11835 // unallocated since we can refer directly to the source string.
11836 pm_token_buffer_t token_buffer = { 0 };
11837
11838 while (breakpoint != NULL) {
11839 // If we hit the incrementor, then we'll increment then nesting and
11840 // continue lexing.
11841 if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
11842 lex_mode->as.string.nesting++;
11843 parser->current.end = breakpoint + 1;
11844 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11845 continue;
11846 }
11847
11848 uint8_t term = lex_mode->as.string.terminator;
11849 bool is_terminator = (*breakpoint == term);
11850
11851 // If the terminator is newline, we need to consider \r\n _also_ a newline
11852 // For example: `%r\nfoo\r\n`
11853 // The string should be /foo/, not /foo\r/
11854 if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
11855 if (term == '\n') {
11856 is_terminator = true;
11857 }
11858
11859 // If the terminator is a CR, but we see a CRLF, we need to
11860 // treat the CRLF as a newline, meaning this is _not_ the
11861 // terminator
11862 if (term == '\r') {
11863 is_terminator = false;
11864 }
11865 }
11866
11867 // Note that we have to check the terminator here first because we could
11868 // potentially be parsing a % string that has a # character as the
11869 // terminator.
11870 if (is_terminator) {
11871 // If this terminator doesn't actually close the string, then we need
11872 // to continue on past it.
11873 if (lex_mode->as.string.nesting > 0) {
11874 parser->current.end = breakpoint + 1;
11875 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11876 lex_mode->as.string.nesting--;
11877 continue;
11878 }
11879
11880 // Here we've hit the terminator. If we have already consumed content
11881 // then we need to return that content as string content first.
11882 if (breakpoint > parser->current.start) {
11883 parser->current.end = breakpoint;
11884 pm_token_buffer_flush(parser, &token_buffer);
11885 LEX(PM_TOKEN_STRING_CONTENT);
11886 }
11887
11888 // Otherwise we need to switch back to the parent lex mode and
11889 // return the end of the string.
11890 size_t eol_length = match_eol_at(parser, breakpoint);
11891 if (eol_length) {
11892 parser->current.end = breakpoint + eol_length;
11893
11894 // Track the newline if we're not in a heredoc that
11895 // would have already have added the newline to the
11896 // list.
11897 if (parser->heredoc_end == NULL) {
11898 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current));
11899 }
11900 } else {
11901 parser->current.end = breakpoint + 1;
11902 }
11903
11904 if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
11905 parser->current.end++;
11906 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11907 lex_mode_pop(parser);
11908 LEX(PM_TOKEN_LABEL_END);
11909 }
11910
11911 // When the delimiter itself is a newline, we won't
11912 // get a chance to flush heredocs in the usual places since
11913 // the newline is already consumed.
11914 if (term == '\n' && parser->heredoc_end) {
11915 parser_flush_heredoc_end(parser);
11916 }
11917
11918 lex_state_set(parser, PM_LEX_STATE_END);
11919 lex_mode_pop(parser);
11920 LEX(PM_TOKEN_STRING_END);
11921 }
11922
11923 switch (*breakpoint) {
11924 case '\0':
11925 // Skip directly past the null character.
11926 parser->current.end = breakpoint + 1;
11927 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11928 break;
11929 case '\r':
11930 if (peek_at(parser, breakpoint + 1) != '\n') {
11931 parser->current.end = breakpoint + 1;
11932 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11933 break;
11934 }
11935
11936 // If we hit a \r\n sequence, then we need to treat it
11937 // as a newline.
11938 breakpoint++;
11939 parser->current.end = breakpoint;
11940 pm_token_buffer_escape(parser, &token_buffer);
11941 token_buffer.cursor = breakpoint;
11942
11944 case '\n':
11945 // When we hit a newline, we need to flush any potential
11946 // heredocs. Note that this has to happen after we check
11947 // for the terminator in case the terminator is a
11948 // newline character.
11949 if (parser->heredoc_end == NULL) {
11950 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
11951 parser->current.end = breakpoint + 1;
11952 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11953 break;
11954 }
11955
11956 parser->current.end = breakpoint + 1;
11957 parser_flush_heredoc_end(parser);
11958 pm_token_buffer_flush(parser, &token_buffer);
11959 LEX(PM_TOKEN_STRING_CONTENT);
11960 case '\\': {
11961 // Here we hit escapes.
11962 parser->current.end = breakpoint + 1;
11963
11964 // If we've hit the end of the file, then break out of
11965 // the loop by setting the breakpoint to NULL.
11966 if (parser->current.end == parser->end) {
11967 breakpoint = NULL;
11968 continue;
11969 }
11970
11971 pm_token_buffer_escape(parser, &token_buffer);
11972 uint8_t peeked = peek(parser);
11973
11974 switch (peeked) {
11975 case '\\':
11976 pm_token_buffer_push_byte(&token_buffer, '\\');
11977 parser->current.end++;
11978 break;
11979 case '\r':
11980 parser->current.end++;
11981 if (peek(parser) != '\n') {
11982 if (!lex_mode->as.string.interpolation) {
11983 pm_token_buffer_push_byte(&token_buffer, '\\');
11984 }
11985 pm_token_buffer_push_byte(&token_buffer, '\r');
11986 break;
11987 }
11989 case '\n':
11990 if (!lex_mode->as.string.interpolation) {
11991 pm_token_buffer_push_byte(&token_buffer, '\\');
11992 pm_token_buffer_push_byte(&token_buffer, '\n');
11993 }
11994
11995 if (parser->heredoc_end) {
11996 // ... if we are on the same line as a heredoc,
11997 // flush the heredoc and continue parsing after
11998 // heredoc_end.
11999 parser_flush_heredoc_end(parser);
12000 pm_token_buffer_copy(parser, &token_buffer);
12001 LEX(PM_TOKEN_STRING_CONTENT);
12002 } else {
12003 // ... else track the newline.
12004 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, PM_TOKEN_END(parser, &parser->current) + 1);
12005 }
12006
12007 parser->current.end++;
12008 break;
12009 default:
12010 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12011 pm_token_buffer_push_byte(&token_buffer, peeked);
12012 parser->current.end++;
12013 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12014 pm_token_buffer_push_byte(&token_buffer, peeked);
12015 parser->current.end++;
12016 } else if (lex_mode->as.string.interpolation) {
12017 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12018 } else {
12019 pm_token_buffer_push_byte(&token_buffer, '\\');
12020 pm_token_buffer_push_escaped(&token_buffer, parser);
12021 }
12022
12023 break;
12024 }
12025
12026 token_buffer.cursor = parser->current.end;
12027 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12028 break;
12029 }
12030 case '#': {
12031 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12032
12033 if (!type) {
12034 // If we haven't returned at this point then we had something that
12035 // looked like an interpolated class or instance variable like "#@"
12036 // but wasn't actually. In this case we'll just skip to the next
12037 // breakpoint.
12038 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12039 break;
12040 }
12041
12042 if (type == PM_TOKEN_STRING_CONTENT) {
12043 pm_token_buffer_flush(parser, &token_buffer);
12044 }
12045
12046 LEX(type);
12047 }
12048 default:
12049 assert(false && "unreachable");
12050 }
12051 }
12052
12053 if (parser->current.end > parser->current.start) {
12054 pm_token_buffer_flush(parser, &token_buffer);
12055 LEX(PM_TOKEN_STRING_CONTENT);
12056 }
12057
12058 // If we've hit the end of the string, then this is an unterminated
12059 // string. In that case we'll return a string content token.
12060 parser->current.end = parser->end;
12061 pm_token_buffer_flush(parser, &token_buffer);
12062 LEX(PM_TOKEN_STRING_CONTENT);
12063 }
12064 case PM_LEX_HEREDOC: {
12065 // First, we'll set to start of this token.
12066 if (parser->next_start == NULL) {
12067 parser->current.start = parser->current.end;
12068 } else {
12069 parser->current.start = parser->next_start;
12070 parser->current.end = parser->next_start;
12071 parser->heredoc_end = NULL;
12072 parser->next_start = NULL;
12073 }
12074
12075 // Now let's grab the information about the identifier off of the
12076 // current lex mode.
12077 pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12078 pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12079
12080 bool line_continuation = lex_mode->as.heredoc.line_continuation;
12081 lex_mode->as.heredoc.line_continuation = false;
12082
12083 // We'll check if we're at the end of the file. If we are, then we
12084 // will add an error (because we weren't able to find the
12085 // terminator) but still continue parsing so that content after the
12086 // declaration of the heredoc can be parsed.
12087 if (parser->current.end >= parser->end) {
12088 pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12089 parser->next_start = lex_mode->as.heredoc.next_start;
12090 parser->heredoc_end = parser->current.end;
12091 lex_state_set(parser, PM_LEX_STATE_END);
12092 lex_mode_pop(parser);
12093 LEX(PM_TOKEN_HEREDOC_END);
12094 }
12095
12096 const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12097 size_t ident_length = heredoc_lex_mode->ident_length;
12098
12099 // If we are immediately following a newline and we have hit the
12100 // terminator, then we need to return the ending of the heredoc.
12101 if (current_token_starts_line(parser)) {
12102 const uint8_t *start = parser->current.start;
12103
12104 if (!line_continuation && (start + ident_length <= parser->end)) {
12105 const uint8_t *newline = next_newline(start, parser->end - start);
12106 const uint8_t *ident_end = newline;
12107 const uint8_t *terminator_end = newline;
12108
12109 if (newline == NULL) {
12110 terminator_end = parser->end;
12111 ident_end = parser->end;
12112 } else {
12113 terminator_end++;
12114 if (newline[-1] == '\r') {
12115 ident_end--; // Remove \r
12116 }
12117 }
12118
12119 const uint8_t *terminator_start = ident_end - ident_length;
12120 const uint8_t *cursor = start;
12121
12122 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12123 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12124 cursor++;
12125 }
12126 }
12127
12128 if (
12129 (cursor == terminator_start) &&
12130 (memcmp(terminator_start, ident_start, ident_length) == 0)
12131 ) {
12132 if (newline != NULL) {
12133 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
12134 }
12135
12136 parser->current.end = terminator_end;
12137 if (*lex_mode->as.heredoc.next_start == '\\') {
12138 parser->next_start = NULL;
12139 } else {
12140 parser->next_start = lex_mode->as.heredoc.next_start;
12141 parser->heredoc_end = parser->current.end;
12142 }
12143
12144 lex_state_set(parser, PM_LEX_STATE_END);
12145 lex_mode_pop(parser);
12146 LEX(PM_TOKEN_HEREDOC_END);
12147 }
12148 }
12149
12150 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12151 if (
12152 heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12153 lex_mode->as.heredoc.common_whitespace != NULL &&
12154 (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12155 peek_at(parser, start) != '\n'
12156 ) {
12157 *lex_mode->as.heredoc.common_whitespace = whitespace;
12158 }
12159 }
12160
12161 // Otherwise we'll be parsing string content. These are the places
12162 // where we need to split up the content of the heredoc. We'll use
12163 // strpbrk to find the first of these characters.
12164 uint8_t breakpoints[PM_STRPBRK_CACHE_SIZE] = "\r\n\\#";
12165
12166 pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12167 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12168 breakpoints[3] = '\0';
12169 }
12170
12171 const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12172 pm_token_buffer_t token_buffer = { 0 };
12173 bool was_line_continuation = false;
12174
12175 while (breakpoint != NULL) {
12176 switch (*breakpoint) {
12177 case '\0':
12178 // Skip directly past the null character.
12179 parser->current.end = breakpoint + 1;
12180 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12181 break;
12182 case '\r':
12183 parser->current.end = breakpoint + 1;
12184
12185 if (peek_at(parser, breakpoint + 1) != '\n') {
12186 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12187 break;
12188 }
12189
12190 // If we hit a \r\n sequence, then we want to replace it
12191 // with a single \n character in the final string.
12192 breakpoint++;
12193 pm_token_buffer_escape(parser, &token_buffer);
12194 token_buffer.cursor = breakpoint;
12195
12197 case '\n': {
12198 if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12199 parser_flush_heredoc_end(parser);
12200 parser->current.end = breakpoint + 1;
12201 pm_token_buffer_flush(parser, &token_buffer);
12202 LEX(PM_TOKEN_STRING_CONTENT);
12203 }
12204
12205 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(breakpoint - parser->start + 1));
12206
12207 // If we have a - or ~ heredoc, then we can match after
12208 // some leading whitespace.
12209 const uint8_t *start = breakpoint + 1;
12210
12211 if (!was_line_continuation && (start + ident_length <= parser->end)) {
12212 // We want to match the terminator starting from the end of the line in case
12213 // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12214 const uint8_t *newline = next_newline(start, parser->end - start);
12215
12216 if (newline == NULL) {
12217 newline = parser->end;
12218 } else if (newline[-1] == '\r') {
12219 newline--; // Remove \r
12220 }
12221
12222 // Start of a possible terminator.
12223 const uint8_t *terminator_start = newline - ident_length;
12224
12225 // Cursor to check for the leading whitespace. We skip the
12226 // leading whitespace if we have a - or ~ heredoc.
12227 const uint8_t *cursor = start;
12228
12229 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12230 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12231 cursor++;
12232 }
12233 }
12234
12235 if (
12236 cursor == terminator_start &&
12237 (memcmp(terminator_start, ident_start, ident_length) == 0)
12238 ) {
12239 parser->current.end = breakpoint + 1;
12240 pm_token_buffer_flush(parser, &token_buffer);
12241 LEX(PM_TOKEN_STRING_CONTENT);
12242 }
12243 }
12244
12245 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12246
12247 // If we have hit a newline that is followed by a valid
12248 // terminator, then we need to return the content of the
12249 // heredoc here as string content. Then, the next time a
12250 // token is lexed, it will match again and return the
12251 // end of the heredoc.
12252 if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12253 if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12254 *lex_mode->as.heredoc.common_whitespace = whitespace;
12255 }
12256
12257 parser->current.end = breakpoint + 1;
12258 pm_token_buffer_flush(parser, &token_buffer);
12259 LEX(PM_TOKEN_STRING_CONTENT);
12260 }
12261
12262 // Otherwise we hit a newline and it wasn't followed by
12263 // a terminator, so we can continue parsing.
12264 parser->current.end = breakpoint + 1;
12265 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12266 break;
12267 }
12268 case '\\': {
12269 // If we hit an escape, then we need to skip past
12270 // however many characters the escape takes up. However
12271 // it's important that if \n or \r\n are escaped, we
12272 // stop looping before the newline and not after the
12273 // newline so that we can still potentially find the
12274 // terminator of the heredoc.
12275 parser->current.end = breakpoint + 1;
12276
12277 // If we've hit the end of the file, then break out of
12278 // the loop by setting the breakpoint to NULL.
12279 if (parser->current.end == parser->end) {
12280 breakpoint = NULL;
12281 continue;
12282 }
12283
12284 pm_token_buffer_escape(parser, &token_buffer);
12285 uint8_t peeked = peek(parser);
12286
12287 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12288 switch (peeked) {
12289 case '\r':
12290 parser->current.end++;
12291 if (peek(parser) != '\n') {
12292 pm_token_buffer_push_byte(&token_buffer, '\\');
12293 pm_token_buffer_push_byte(&token_buffer, '\r');
12294 break;
12295 }
12297 case '\n':
12298 pm_token_buffer_push_byte(&token_buffer, '\\');
12299 pm_token_buffer_push_byte(&token_buffer, '\n');
12300 token_buffer.cursor = parser->current.end + 1;
12301 breakpoint = parser->current.end;
12302 continue;
12303 default:
12304 pm_token_buffer_push_byte(&token_buffer, '\\');
12305 pm_token_buffer_push_escaped(&token_buffer, parser);
12306 break;
12307 }
12308 } else {
12309 switch (peeked) {
12310 case '\r':
12311 parser->current.end++;
12312 if (peek(parser) != '\n') {
12313 pm_token_buffer_push_byte(&token_buffer, '\r');
12314 break;
12315 }
12317 case '\n':
12318 // If we are in a tilde here, we should
12319 // break out of the loop and return the
12320 // string content.
12321 if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12322 const uint8_t *end = parser->current.end;
12323
12324 if (parser->heredoc_end == NULL) {
12325 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(end - parser->start + 1));
12326 }
12327
12328 // Here we want the buffer to only
12329 // include up to the backslash.
12330 parser->current.end = breakpoint;
12331 pm_token_buffer_flush(parser, &token_buffer);
12332
12333 // Now we can advance the end of the
12334 // token past the newline.
12335 parser->current.end = end + 1;
12336 lex_mode->as.heredoc.line_continuation = true;
12337 LEX(PM_TOKEN_STRING_CONTENT);
12338 }
12339
12340 was_line_continuation = true;
12341 token_buffer.cursor = parser->current.end + 1;
12342 breakpoint = parser->current.end;
12343 continue;
12344 default:
12345 escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12346 break;
12347 }
12348 }
12349
12350 token_buffer.cursor = parser->current.end;
12351 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12352 break;
12353 }
12354 case '#': {
12355 pm_token_type_t type = lex_interpolation(parser, breakpoint);
12356
12357 if (!type) {
12358 // If we haven't returned at this point then we had
12359 // something that looked like an interpolated class
12360 // or instance variable like "#@" but wasn't
12361 // actually. In this case we'll just skip to the
12362 // next breakpoint.
12363 breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12364 break;
12365 }
12366
12367 if (type == PM_TOKEN_STRING_CONTENT) {
12368 pm_token_buffer_flush(parser, &token_buffer);
12369 }
12370
12371 LEX(type);
12372 }
12373 default:
12374 assert(false && "unreachable");
12375 }
12376
12377 was_line_continuation = false;
12378 }
12379
12380 if (parser->current.end > parser->current.start) {
12381 parser->current.end = parser->end;
12382 pm_token_buffer_flush(parser, &token_buffer);
12383 LEX(PM_TOKEN_STRING_CONTENT);
12384 }
12385
12386 // If we've hit the end of the string, then this is an unterminated
12387 // heredoc. In that case we'll return a string content token.
12388 parser->current.end = parser->end;
12389 pm_token_buffer_flush(parser, &token_buffer);
12390 LEX(PM_TOKEN_STRING_CONTENT);
12391 }
12392 }
12393
12394 assert(false && "unreachable");
12395}
12396
12397#undef LEX
12398
12399/******************************************************************************/
12400/* Parse functions */
12401/******************************************************************************/
12402
12411typedef enum {
12412 PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12413 PM_BINDING_POWER_STATEMENT = 2,
12414 PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12415 PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12416 PM_BINDING_POWER_COMPOSITION = 8, // and or
12417 PM_BINDING_POWER_NOT = 10, // not
12418 PM_BINDING_POWER_MATCH = 12, // => in
12419 PM_BINDING_POWER_DEFINED = 14, // defined?
12420 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12421 PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12422 PM_BINDING_POWER_TERNARY = 20, // ?:
12423 PM_BINDING_POWER_RANGE = 22, // .. ...
12424 PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12425 PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12426 PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12427 PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12428 PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12429 PM_BINDING_POWER_BITWISE_AND = 34, // &
12430 PM_BINDING_POWER_SHIFT = 36, // << >>
12431 PM_BINDING_POWER_TERM = 38, // + -
12432 PM_BINDING_POWER_FACTOR = 40, // * / %
12433 PM_BINDING_POWER_UMINUS = 42, // -@
12434 PM_BINDING_POWER_EXPONENT = 44, // **
12435 PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12436 PM_BINDING_POWER_INDEX = 48, // [] []=
12437 PM_BINDING_POWER_CALL = 50, // :: .
12438 PM_BINDING_POWER_MAX = 52
12439} pm_binding_power_t;
12440
12445typedef struct {
12447 pm_binding_power_t left;
12448
12450 pm_binding_power_t right;
12451
12454
12461
12462#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12463#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12464#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12465#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12466#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12467
12468pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12469 // rescue
12470 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION, true, false },
12471
12472 // if unless until while
12473 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12474 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12475 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12476 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12477
12478 // and or
12479 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12480 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12481
12482 // => in
12483 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12484 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12485
12486 // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12487 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12488 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12489 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12490 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12491 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12492 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12493 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12494 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12495 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12496 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12497 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12498 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12499 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12500 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12501
12502 // ?:
12503 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12504
12505 // .. ...
12506 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12507 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12508 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12509 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12510
12511 // ||
12512 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12513
12514 // &&
12515 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12516
12517 // != !~ == === =~ <=>
12518 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12519 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12520 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12521 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12522 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12523 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12524
12525 // > >= < <=
12526 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12527 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12528 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12529 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12530
12531 // ^ |
12532 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12533 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12534
12535 // &
12536 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12537
12538 // >> <<
12539 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12540 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12541
12542 // - +
12543 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12544 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12545
12546 // % / *
12547 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12548 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12549 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12550 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12551
12552 // -@
12553 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12554 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12555
12556 // **
12557 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12558 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12559
12560 // ! ~ +@
12561 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12562 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12563 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12564
12565 // [
12566 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12567
12568 // :: . &.
12569 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12570 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12571 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12572};
12573
12574#undef BINDING_POWER_ASSIGNMENT
12575#undef LEFT_ASSOCIATIVE
12576#undef RIGHT_ASSOCIATIVE
12577#undef RIGHT_ASSOCIATIVE_UNARY
12578
12582static PRISM_INLINE bool
12583match1(const pm_parser_t *parser, pm_token_type_t type) {
12584 return parser->current.type == type;
12585}
12586
12590static PRISM_INLINE bool
12591match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12592 return match1(parser, type1) || match1(parser, type2);
12593}
12594
12598static PRISM_INLINE bool
12599match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12600 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12601}
12602
12606static PRISM_INLINE bool
12607match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12608 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12609}
12610
12614static PRISM_INLINE bool
12615match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
12616 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
12617}
12618
12622static PRISM_INLINE bool
12623match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12624 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12625}
12626
12630static PRISM_INLINE bool
12631match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12632 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12633}
12634
12641static bool
12642accept1(pm_parser_t *parser, pm_token_type_t type) {
12643 if (match1(parser, type)) {
12644 parser_lex(parser);
12645 return true;
12646 }
12647 return false;
12648}
12649
12654static PRISM_INLINE bool
12655accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12656 if (match2(parser, type1, type2)) {
12657 parser_lex(parser);
12658 return true;
12659 }
12660 return false;
12661}
12662
12674static void
12675expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
12676 if (accept1(parser, type)) return;
12677
12678 const uint8_t *location = parser->previous.end;
12679 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12680
12681 parser->previous.start = location;
12682 parser->previous.type = 0;
12683}
12684
12689static void
12690expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
12691 if (accept2(parser, type1, type2)) return;
12692
12693 const uint8_t *location = parser->previous.end;
12694 pm_parser_err(parser, U32(location - parser->start), 0, diag_id);
12695
12696 parser->previous.start = location;
12697 parser->previous.type = 0;
12698}
12699
12704static void
12705expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
12706 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12707 parser_lex(parser);
12708 } else {
12709 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12710 parser->previous.start = parser->previous.end;
12711 parser->previous.type = 0;
12712 }
12713}
12714
12721static void
12722expect1_opening(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id, const pm_token_t *opening) {
12723 if (accept1(parser, type)) return;
12724
12725 const uint8_t *start = opening->start;
12726 pm_parser_err(parser, U32(start - parser->start), U32(opening->end - start), diag_id);
12727
12728 parser->previous.start = parser->previous.end;
12729 parser->previous.type = 0;
12730}
12731
12733#define PM_PARSE_ACCEPTS_COMMAND_CALL ((uint8_t) 0x1)
12734#define PM_PARSE_ACCEPTS_LABEL ((uint8_t) 0x2)
12735#define PM_PARSE_ACCEPTS_DO_BLOCK ((uint8_t) 0x4)
12736#define PM_PARSE_IN_ENDLESS_DEF ((uint8_t) 0x8)
12737
12738static pm_node_t *
12739parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
12740
12745static pm_node_t *
12746parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12747 pm_node_t *node = parse_expression(parser, binding_power, flags, diag_id, depth);
12748 pm_assert_value_expression(parser, node);
12749 return node;
12750}
12751
12770static PRISM_INLINE bool
12771token_begins_expression_p(pm_token_type_t type) {
12772 switch (type) {
12773 case PM_TOKEN_EQUAL_GREATER:
12774 case PM_TOKEN_KEYWORD_IN:
12775 // We need to special case this because it is a binary operator that
12776 // should not be marked as beginning an expression.
12777 return false;
12778 case PM_TOKEN_BRACE_RIGHT:
12779 case PM_TOKEN_BRACKET_RIGHT:
12780 case PM_TOKEN_COLON:
12781 case PM_TOKEN_COMMA:
12782 case PM_TOKEN_EMBEXPR_END:
12783 case PM_TOKEN_EOF:
12784 case PM_TOKEN_LAMBDA_BEGIN:
12785 case PM_TOKEN_KEYWORD_DO:
12786 case PM_TOKEN_KEYWORD_DO_BLOCK:
12787 case PM_TOKEN_KEYWORD_DO_LOOP:
12788 case PM_TOKEN_KEYWORD_END:
12789 case PM_TOKEN_KEYWORD_ELSE:
12790 case PM_TOKEN_KEYWORD_ELSIF:
12791 case PM_TOKEN_KEYWORD_ENSURE:
12792 case PM_TOKEN_KEYWORD_THEN:
12793 case PM_TOKEN_KEYWORD_RESCUE:
12794 case PM_TOKEN_KEYWORD_WHEN:
12795 case PM_TOKEN_NEWLINE:
12796 case PM_TOKEN_PARENTHESIS_RIGHT:
12797 case PM_TOKEN_SEMICOLON:
12798 // The reason we need this short-circuit is because we're using the
12799 // binding powers table to tell us if the subsequent token could
12800 // potentially be the start of an expression. If there _is_ a binding
12801 // power for one of these tokens, then we should remove it from this list
12802 // and let it be handled by the default case below.
12803 assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
12804 return false;
12805 case PM_TOKEN_UAMPERSAND:
12806 // This is a special case because this unary operator cannot appear
12807 // as a general operator, it only appears in certain circumstances.
12808 return false;
12809 case PM_TOKEN_UCOLON_COLON:
12810 case PM_TOKEN_UMINUS:
12811 case PM_TOKEN_UMINUS_NUM:
12812 case PM_TOKEN_UPLUS:
12813 case PM_TOKEN_BANG:
12814 case PM_TOKEN_TILDE:
12815 case PM_TOKEN_UDOT_DOT:
12816 case PM_TOKEN_UDOT_DOT_DOT:
12817 // These unary tokens actually do have binding power associated with them
12818 // so that we can correctly place them into the precedence order. But we
12819 // want them to be marked as beginning an expression, so we need to
12820 // special case them here.
12821 return true;
12822 default:
12823 return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
12824 }
12825}
12826
12831static pm_node_t *
12832parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
12833 if (accept1(parser, PM_TOKEN_USTAR)) {
12834 pm_token_t operator = parser->previous;
12835 pm_node_t *expression = parse_value_expression(parser, binding_power, (uint8_t) (flags & PM_PARSE_ACCEPTS_DO_BLOCK), PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12836 return UP(pm_splat_node_create(parser, &operator, expression));
12837 }
12838
12839 return parse_value_expression(parser, binding_power, flags, diag_id, depth);
12840}
12841
12842static bool
12843pm_node_unreference_each(const pm_node_t *node, void *data) {
12844 switch (PM_NODE_TYPE(node)) {
12845 /* When we are about to destroy a set of nodes that could potentially
12846 * contain block exits for the current scope, we need to check if they
12847 * are contained in the list of block exits and remove them if they are.
12848 */
12849 case PM_BREAK_NODE:
12850 case PM_NEXT_NODE:
12851 case PM_REDO_NODE: {
12852 pm_parser_t *parser = (pm_parser_t *) data;
12853 size_t index = 0;
12854
12855 while (index < parser->current_block_exits->size) {
12856 pm_node_t *block_exit = parser->current_block_exits->nodes[index];
12857
12858 if (block_exit == node) {
12859 if (index + 1 < parser->current_block_exits->size) {
12860 memmove(
12861 &parser->current_block_exits->nodes[index],
12862 &parser->current_block_exits->nodes[index + 1],
12863 (parser->current_block_exits->size - index - 1) * sizeof(pm_node_t *)
12864 );
12865 }
12866 parser->current_block_exits->size--;
12867
12868 /* Note returning true here because these nodes could have
12869 * arguments that are themselves block exits. */
12870 return true;
12871 }
12872
12873 index++;
12874 }
12875
12876 return true;
12877 }
12878 /* When an implicit local variable is written to or targeted, it becomes
12879 * a regular, named local variable. This branch removes it from the list
12880 * of implicit parameters when that happens. */
12881 case PM_LOCAL_VARIABLE_READ_NODE:
12882 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12883 pm_parser_t *parser = (pm_parser_t *) data;
12884 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
12885
12886 for (size_t index = 0; index < implicit_parameters->size; index++) {
12887 if (implicit_parameters->nodes[index] == node) {
12888 /* If the node is not the last one in the list, we need to
12889 * shift the remaining nodes down to fill the gap. This is
12890 * extremely unlikely to happen. */
12891 if (index != implicit_parameters->size - 1) {
12892 memmove(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
12893 }
12894
12895 implicit_parameters->size--;
12896 break;
12897 }
12898 }
12899
12900 return false;
12901 }
12902 default:
12903 return true;
12904 }
12905}
12906
12912static void
12913pm_node_unreference(pm_parser_t *parser, const pm_node_t *node) {
12914 pm_visit_node(node, pm_node_unreference_each, parser);
12915}
12916
12921static void
12922parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
12923 // The method name needs to change. If we previously had
12924 // foo, we now need foo=. In this case we'll allocate a new
12925 // owned string, copy the previous method name in, and
12926 // append an =.
12927 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
12928 size_t length = constant->length;
12929 uint8_t *name = (uint8_t *) pm_arena_alloc(parser->arena, length + 1, 1);
12930
12931 memcpy(name, constant->start, length);
12932 name[length] = '=';
12933
12934 *name_field = pm_constant_pool_insert_owned(&parser->metadata_arena, &parser->constant_pool, name, length + 1);
12935}
12936
12943static pm_node_t *
12944parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
12945 switch (PM_NODE_TYPE(target)) {
12946 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
12947 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
12948 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
12949 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
12950 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
12951 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
12952 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
12953 default: break;
12954 }
12955
12956 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
12957 pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
12958
12959 return UP(result);
12960}
12961
12970static pm_node_t *
12971parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
12972 switch (PM_NODE_TYPE(target)) {
12973 case PM_ERROR_RECOVERY_NODE:
12974 return target;
12975 case PM_SOURCE_ENCODING_NODE:
12976 case PM_FALSE_NODE:
12977 case PM_SOURCE_FILE_NODE:
12978 case PM_SOURCE_LINE_NODE:
12979 case PM_NIL_NODE:
12980 case PM_SELF_NODE:
12981 case PM_TRUE_NODE: {
12982 // In these special cases, we have specific error messages and we
12983 // will replace them with local variable writes.
12984 return parse_unwriteable_target(parser, target);
12985 }
12986 case PM_CLASS_VARIABLE_READ_NODE:
12988 target->type = PM_CLASS_VARIABLE_TARGET_NODE;
12989 return target;
12990 case PM_CONSTANT_PATH_NODE:
12991 if (context_def_p(parser)) {
12992 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12993 }
12994
12996 target->type = PM_CONSTANT_PATH_TARGET_NODE;
12997
12998 return target;
12999 case PM_CONSTANT_READ_NODE:
13000 if (context_def_p(parser)) {
13001 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13002 }
13003
13004 assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13005 target->type = PM_CONSTANT_TARGET_NODE;
13006
13007 return target;
13008 case PM_BACK_REFERENCE_READ_NODE:
13009 case PM_NUMBERED_REFERENCE_READ_NODE:
13010 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13011 return UP(pm_error_recovery_node_create_unexpected(parser, target));
13012 case PM_GLOBAL_VARIABLE_READ_NODE:
13014 target->type = PM_GLOBAL_VARIABLE_TARGET_NODE;
13015 return target;
13016 case PM_LOCAL_VARIABLE_READ_NODE: {
13017 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
13018 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(target));
13019 pm_node_unreference(parser, target);
13020 }
13021
13022 const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13023 uint32_t name = cast->name;
13024 uint32_t depth = cast->depth;
13025 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13026
13028 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
13029
13030 return target;
13031 }
13032 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13033 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13034 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->location, name, 0));
13035
13036 pm_node_unreference(parser, target);
13037
13038 return node;
13039 }
13040 case PM_INSTANCE_VARIABLE_READ_NODE:
13042 target->type = PM_INSTANCE_VARIABLE_TARGET_NODE;
13043 return target;
13044 case PM_MULTI_TARGET_NODE:
13045 if (splat_parent) {
13046 // Multi target is not accepted in all positions. If this is one
13047 // of them, then we need to add an error.
13048 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13049 }
13050
13051 return target;
13052 case PM_SPLAT_NODE: {
13053 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13054
13055 if (splat->expression != NULL) {
13056 splat->expression = parse_target(parser, splat->expression, multiple, true);
13057 }
13058
13059 return UP(splat);
13060 }
13061 case PM_CALL_NODE: {
13062 pm_call_node_t *call = (pm_call_node_t *) target;
13063
13064 // If we have no arguments to the call node and we need this to be a
13065 // target then this is either a method call or a local variable
13066 // write.
13067 if (
13068 (call->message_loc.length > 0) &&
13069 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13070 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13071 (call->opening_loc.length == 0) &&
13072 (call->arguments == NULL) &&
13073 (call->block == NULL)
13074 ) {
13075 if (call->receiver == NULL) {
13076 // When we get here, we have a local variable write, because it
13077 // was previously marked as a method call but now we have an =.
13078 // This looks like:
13079 //
13080 // foo = 1
13081 //
13082 // When it was parsed in the prefix position, foo was seen as a
13083 // method call with no receiver and no arguments. Now we have an
13084 // =, so we know it's a local variable write.
13085 pm_location_t message_loc = call->message_loc;
13086 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
13087
13088 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
13089 }
13090
13091 if (peek_at(parser, parser->start + call->message_loc.start) == '_' || parser->encoding->alnum_char(parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13092 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13093 pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13094 }
13095
13096 parse_write_name(parser, &call->name);
13097 return UP(pm_call_target_node_create(parser, call));
13098 }
13099 }
13100
13101 // If there is no call operator and the message is "[]" then this is
13102 // an aref expression, and we can transform it into an aset
13103 // expression.
13104 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13105 return UP(pm_index_target_node_create(parser, call));
13106 }
13107 }
13109 default:
13110 // In this case we have a node that we don't know how to convert
13111 // into a target. We need to treat it as an error. For now, we'll
13112 // mark it as an error and just skip right past it.
13113 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13114 return target;
13115 }
13116}
13117
13122static pm_node_t *
13123parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13124 pm_node_t *result = parse_target(parser, target, multiple, false);
13125
13126 // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13127 // parens after the targets.
13128 if (
13129 !match1(parser, PM_TOKEN_EQUAL) &&
13130 !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13131 !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13132 ) {
13133 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13134 }
13135
13136 return result;
13137}
13138
13143static pm_node_t *
13144parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13145 pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13146
13147 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13148 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
13149 }
13150
13151 return write;
13152}
13153
13157static pm_node_t *
13158parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13159 switch (PM_NODE_TYPE(target)) {
13160 case PM_ERROR_RECOVERY_NODE:
13161 return target;
13162 case PM_CLASS_VARIABLE_READ_NODE: {
13163 pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13164 return UP(node);
13165 }
13166 case PM_CONSTANT_PATH_NODE: {
13167 pm_node_t *node = UP(pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value));
13168
13169 if (context_def_p(parser)) {
13170 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13171 }
13172
13173 return parse_shareable_constant_write(parser, node);
13174 }
13175 case PM_CONSTANT_READ_NODE: {
13176 pm_node_t *node = UP(pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value));
13177
13178 if (context_def_p(parser)) {
13179 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13180 }
13181
13182 return parse_shareable_constant_write(parser, node);
13183 }
13184 case PM_BACK_REFERENCE_READ_NODE:
13185 case PM_NUMBERED_REFERENCE_READ_NODE:
13186 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13188 case PM_GLOBAL_VARIABLE_READ_NODE: {
13189 pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13190 return UP(node);
13191 }
13192 case PM_LOCAL_VARIABLE_READ_NODE: {
13194
13195 pm_location_t location = target->location;
13196 pm_constant_id_t name = local_read->name;
13197 uint32_t depth = local_read->depth;
13198 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13199
13200 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
13201 pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13202 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->start + PM_NODE_START(target));
13203 pm_node_unreference(parser, target);
13204 }
13205
13206 pm_locals_unread(&scope->locals, name);
13207
13208 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location, operator));
13209 }
13210 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
13211 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13212 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator));
13213
13214 pm_node_unreference(parser, target);
13215
13216 return node;
13217 }
13218 case PM_INSTANCE_VARIABLE_READ_NODE: {
13219 pm_node_t *write_node = UP(pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value));
13220 return write_node;
13221 }
13222 case PM_MULTI_TARGET_NODE:
13223 return UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value));
13224 case PM_SPLAT_NODE: {
13225 pm_splat_node_t *splat = (pm_splat_node_t *) target;
13226
13227 if (splat->expression != NULL) {
13228 splat->expression = parse_write(parser, splat->expression, operator, value);
13229 }
13230
13231 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13232 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
13233
13234 return UP(pm_multi_write_node_create(parser, multi_target, operator, value));
13235 }
13236 case PM_CALL_NODE: {
13237 pm_call_node_t *call = (pm_call_node_t *) target;
13238
13239 // If we have no arguments to the call node and we need this to be a
13240 // target then this is either a method call or a local variable
13241 // write.
13242 if (
13243 (call->message_loc.length > 0) &&
13244 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '!') &&
13245 (parser->start[call->message_loc.start + call->message_loc.length - 1] != '?') &&
13246 (call->opening_loc.length == 0) &&
13247 (call->arguments == NULL) &&
13248 (call->block == NULL)
13249 ) {
13250 if (call->receiver == NULL) {
13251 // When we get here, we have a local variable write, because it
13252 // was previously marked as a method call but now we have an =.
13253 // This looks like:
13254 //
13255 // foo = 1
13256 //
13257 // When it was parsed in the prefix position, foo was seen as a
13258 // method call with no receiver and no arguments. Now we have an
13259 // =, so we know it's a local variable write.
13260 pm_location_t message_loc = call->message_loc;
13261
13262 pm_refute_numbered_parameter(parser, message_loc.start, message_loc.length);
13263 pm_parser_local_add_location(parser, &message_loc, 0);
13264
13265 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->start + PM_LOCATION_START(&message_loc), parser->start + PM_LOCATION_END(&message_loc));
13266 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc, operator));
13267
13268 return target;
13269 }
13270
13271 if (char_is_identifier_start(parser, parser->start + call->message_loc.start, (ptrdiff_t) call->message_loc.length)) {
13272 // When we get here, we have a method call, because it was
13273 // previously marked as a method call but now we have an =. This
13274 // looks like:
13275 //
13276 // foo.bar = 1
13277 //
13278 // When it was parsed in the prefix position, foo.bar was seen as a
13279 // method call with no arguments. Now we have an =, so we know it's
13280 // a method call with an argument. In this case we will create the
13281 // arguments node, parse the argument, and add it to the list.
13282 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13283 call->arguments = arguments;
13284
13285 pm_arguments_node_arguments_append(parser->arena, arguments, value);
13286 PM_NODE_LENGTH_SET_NODE(call, arguments);
13287 call->equal_loc = TOK2LOC(parser, operator);
13288
13289 parse_write_name(parser, &call->name);
13290 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13291
13292 return UP(call);
13293 }
13294 }
13295
13296 // If there is no call operator and the message is "[]" then this is
13297 // an aref expression, and we can transform it into an aset
13298 // expression.
13299 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13300 if (call->arguments == NULL) {
13301 call->arguments = pm_arguments_node_create(parser);
13302 }
13303
13304 pm_arguments_node_arguments_append(parser->arena, call->arguments, value);
13305 PM_NODE_LENGTH_SET_NODE(target, value);
13306
13307 // Replace the name with "[]=".
13308 call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13309 call->equal_loc = TOK2LOC(parser, operator);
13310
13311 // Ensure that the arguments for []= don't contain keywords
13312 pm_index_arguments_check(parser, call->arguments, call->block);
13313 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13314
13315 return target;
13316 }
13317
13318 // If there are arguments on the call node, then it can't be a
13319 // method call ending with = or a local variable write, so it must
13320 // be a syntax error. In this case we'll fall through to our default
13321 // handling. We need to free the value that we parsed because there
13322 // is no way for us to attach it to the tree at this point.
13323 //
13324 // Since it is possible for the value to contain an implicit
13325 // parameter somewhere in its subtree, we need to walk it and remove
13326 // any implicit parameters from the list of implicit parameters for
13327 // the current scope.
13328 pm_node_unreference(parser, value);
13329 }
13331 default:
13332 // In this case we have a node that we don't know how to convert into a
13333 // target. We need to treat it as an error. For now, we'll mark it as an
13334 // error and just skip right past it.
13335 pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13336 return target;
13337 }
13338}
13339
13346static pm_node_t *
13347parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13348 switch (PM_NODE_TYPE(target)) {
13349 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13350 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13351 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13352 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13353 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13354 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13355 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13356 default: break;
13357 }
13358
13359 pm_constant_id_t name = pm_parser_local_add_location(parser, &target->location, 1);
13360 pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13361
13362 return UP(result);
13363}
13364
13375static pm_node_t *
13376parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13377 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13378
13379 pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13380 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13381
13382 while (accept1(parser, PM_TOKEN_COMMA)) {
13383 if (accept1(parser, PM_TOKEN_USTAR)) {
13384 // Here we have a splat operator. It can have a name or be
13385 // anonymous. It can be the final target or be in the middle if
13386 // there haven't been any others yet.
13387 if (has_rest) {
13388 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13389 }
13390
13391 pm_token_t star_operator = parser->previous;
13392 pm_node_t *name = NULL;
13393
13394 if (token_begins_expression_p(parser->current.type)) {
13395 name = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13396 name = parse_target(parser, name, true, true);
13397 }
13398
13399 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13400 pm_multi_target_node_targets_append(parser, result, splat);
13401 has_rest = true;
13402 } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13403 context_push(parser, PM_CONTEXT_MULTI_TARGET);
13404 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13405 target = parse_target(parser, target, true, false);
13406
13407 pm_multi_target_node_targets_append(parser, result, target);
13408 context_pop(parser);
13409 } else if (token_begins_expression_p(parser->current.type)) {
13410 pm_node_t *target = parse_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13411 target = parse_target(parser, target, true, false);
13412
13413 pm_multi_target_node_targets_append(parser, result, target);
13414 } else if (!match1(parser, PM_TOKEN_EOF)) {
13415 // If we get here, then we have a trailing , in a multi target node.
13416 // We'll add an implicit rest node to represent this.
13417 pm_node_t *rest = UP(pm_implicit_rest_node_create(parser, &parser->previous));
13418 pm_multi_target_node_targets_append(parser, result, rest);
13419 break;
13420 }
13421 }
13422
13423 return UP(result);
13424}
13425
13430static pm_node_t *
13431parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13432 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13433
13434 // If we're inside parentheses, then we allow a newline before the
13435 // closing parenthesis or equals sign. Outside of parentheses, a newline
13436 // is not allowed (e.g., `a, b\n= 1, 2` is not valid).
13437 if (context_p(parser, PM_CONTEXT_PARENS) || context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
13438 accept1(parser, PM_TOKEN_NEWLINE);
13439 }
13440
13441 // Ensure that we have either an = or a ) after the targets.
13442 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13443 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13444 }
13445
13446 return result;
13447}
13448
13452static pm_statements_node_t *
13453parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13454 // First, skip past any optional terminators that might be at the beginning
13455 // of the statements.
13456 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13457
13458 // If we have a terminator, then we can just return NULL.
13459 if (context_terminator(context, &parser->current)) return NULL;
13460
13461 pm_statements_node_t *statements = pm_statements_node_create(parser);
13462
13463 // At this point we know we have at least one statement, and that it
13464 // immediately follows the current token.
13465 context_push(parser, context);
13466
13467 while (true) {
13468 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13469 pm_statements_node_body_append(parser, statements, node, true);
13470
13471 // If we're recovering from a syntax error, then we need to stop parsing
13472 // the statements now.
13473 if (parser->recovering) {
13474 // If this is the level of context where the recovery has happened,
13475 // then we can mark the parser as done recovering.
13476 if (context_terminator(context, &parser->current)) parser->recovering = false;
13477 break;
13478 }
13479
13480 // If we have a terminator, then we will parse all consecutive
13481 // terminators and then continue parsing the statements list.
13482 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13483 // If we have a terminator, then we will continue parsing the
13484 // statements list.
13485 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13486 if (context_terminator(context, &parser->current)) break;
13487
13488 // Now we can continue parsing the list of statements.
13489 continue;
13490 }
13491
13492 // At this point we have a list of statements that are not terminated by
13493 // a newline or semicolon. At this point we need to check if we're at
13494 // the end of the statements list. If we are, then we should break out
13495 // of the loop.
13496 if (context_terminator(context, &parser->current)) break;
13497
13498 // At this point, we have a syntax error, because the statement was not
13499 // terminated by a newline or semicolon, and we're not at the end of the
13500 // statements list. Ideally we should scan forward to determine if we
13501 // should insert a missing terminator or break out of parsing the
13502 // statements list at this point.
13503 //
13504 // We don't have that yet, so instead we'll do a more naive approach. If
13505 // we were unable to parse an expression, then we will skip past this
13506 // token and continue parsing the statements list. Otherwise we'll add
13507 // an error and continue parsing the statements list.
13508 if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) {
13509 parser_lex(parser);
13510
13511 // If we are at the end of the file, then we need to stop parsing
13512 // the statements entirely at this point. Mark the parser as
13513 // recovering, as we know that EOF closes the top-level context, and
13514 // then break out of the loop.
13515 if (match1(parser, PM_TOKEN_EOF)) {
13516 parser->recovering = true;
13517 break;
13518 }
13519
13520 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13521 if (context_terminator(context, &parser->current)) break;
13522 } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13523 // This is an inlined version of accept1 because the error that we
13524 // want to add has varargs. If this happens again, we should
13525 // probably extract a helper function.
13526 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
13527 parser->previous.start = parser->previous.end;
13528 parser->previous.type = 0;
13529 }
13530 }
13531
13532 context_pop(parser);
13533
13534 bool last_value = true;
13535 switch (context) {
13536 case PM_CONTEXT_BEGIN_ENSURE:
13537 case PM_CONTEXT_DEF_ENSURE:
13538 last_value = false;
13539 break;
13540 default:
13541 break;
13542 }
13543 pm_void_statements_check(parser, statements, last_value);
13544
13545 return statements;
13546}
13547
13552static void
13553pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13554 const pm_node_t *duplicated = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, true);
13555
13556 if (duplicated != NULL) {
13557 pm_buffer_t buffer = { 0 };
13558 pm_static_literal_inspect(&buffer, &parser->line_offsets, parser->start, parser->start_line, parser->encoding->name, duplicated);
13559
13560 pm_diagnostic_list_append_format(
13561 &parser->metadata_arena,
13562 &parser->warning_list,
13563 duplicated->location.start,
13564 duplicated->location.length,
13565 PM_WARN_DUPLICATED_HASH_KEY,
13566 (int) pm_buffer_length(&buffer),
13567 pm_buffer_value(&buffer),
13568 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line
13569 );
13570
13571 pm_buffer_cleanup(&buffer);
13572 }
13573}
13574
13579static void
13580pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13581 pm_node_t *previous;
13582
13583 if ((previous = pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, literals, node, false)) != NULL) {
13584 pm_diagnostic_list_append_format(
13585 &parser->metadata_arena,
13586 &parser->warning_list,
13587 PM_NODE_START(node),
13588 PM_NODE_LENGTH(node),
13589 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13590 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(node), parser->start_line).line,
13591 pm_line_offset_list_line_column(&parser->line_offsets, PM_NODE_START(previous), parser->start_line).line
13592 );
13593 }
13594}
13595
13599static bool
13600parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13601 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13602 bool contains_keyword_splat = false;
13603
13604 while (true) {
13605 pm_node_t *element;
13606
13607 switch (parser->current.type) {
13608 case PM_TOKEN_USTAR_STAR: {
13609 parser_lex(parser);
13610 pm_token_t operator = parser->previous;
13611 pm_node_t *value = NULL;
13612
13613 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13614 // If we're about to parse a nested hash that is being
13615 // pushed into this hash directly with **, then we want the
13616 // inner hash to share the static literals with the outer
13617 // hash.
13618 parser->current_hash_keys = literals;
13619 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13620 } else if (token_begins_expression_p(parser->current.type)) {
13621 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13622 } else {
13623 pm_parser_scope_forwarding_keywords_check(parser, &operator);
13624 }
13625
13626 element = UP(pm_assoc_splat_node_create(parser, value, &operator));
13627 contains_keyword_splat = true;
13628 break;
13629 }
13630 case PM_TOKEN_LABEL: {
13631 pm_token_t label = parser->current;
13632 parser_lex(parser);
13633
13634 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13635 pm_hash_key_static_literals_add(parser, literals, key);
13636
13637 pm_node_t *value = NULL;
13638
13639 if (token_begins_expression_p(parser->current.type)) {
13640 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13641 } else {
13642 if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
13643 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
13644 value = UP(pm_constant_read_node_create(parser, &constant));
13645 } else {
13646 int depth = -1;
13647 pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
13648
13649 if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
13650 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13651 } else {
13652 depth = pm_parser_local_depth(parser, &identifier);
13653 }
13654
13655 if (depth == -1) {
13656 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13657 } else {
13658 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13659 }
13660 }
13661
13662 value->location.length++;
13663 value = UP(pm_implicit_node_create(parser, value));
13664 }
13665
13666 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13667 break;
13668 }
13669 default: {
13670 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13671
13672 // Hash keys that are strings are automatically frozen. We will
13673 // mark that here.
13674 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13675 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13676 }
13677
13678 pm_hash_key_static_literals_add(parser, literals, key);
13679
13680 pm_token_t operator = { 0 };
13681 if (!pm_symbol_node_label_p(parser, key)) {
13682 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13683 operator = parser->previous;
13684 }
13685
13686 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13687 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(operator), value));
13688 break;
13689 }
13690 }
13691
13692 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13693 pm_hash_node_elements_append(parser->arena, (pm_hash_node_t *) node, element);
13694 } else {
13695 pm_keyword_hash_node_elements_append(parser->arena, (pm_keyword_hash_node_t *) node, element);
13696 }
13697
13698 // If there's no comma after the element, then we're done.
13699 if (!accept1(parser, PM_TOKEN_COMMA)) break;
13700
13701 // If the next element starts with a label or a **, then we know we have
13702 // another element in the hash, so we'll continue parsing.
13703 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
13704
13705 // Otherwise we need to check if the subsequent token begins an expression.
13706 // If it does, then we'll continue parsing.
13707 if (token_begins_expression_p(parser->current.type)) continue;
13708
13709 // Otherwise by default we will exit out of this loop.
13710 break;
13711 }
13712
13713 return contains_keyword_splat;
13714}
13715
13716static PRISM_INLINE bool
13717argument_allowed_for_bare_hash(pm_parser_t *parser, pm_node_t *argument) {
13718 if (pm_symbol_node_label_p(parser, argument)) {
13719 return true;
13720 }
13721
13722 switch (PM_NODE_TYPE(argument)) {
13723 case PM_CALL_NODE: {
13724 pm_call_node_t *cast = (pm_call_node_t *) argument;
13725 if (cast->opening_loc.length == 0 && cast->arguments != NULL) {
13726 if (PM_NODE_FLAG_P(cast->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13727 return false;
13728 }
13729 if (cast->block != NULL) {
13730 return false;
13731 }
13732 }
13733 break;
13734 }
13735 default: break;
13736 }
13737 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13738}
13739
13743static PRISM_INLINE void
13744parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
13745 if (arguments->arguments == NULL) {
13746 arguments->arguments = pm_arguments_node_create(parser);
13747 }
13748
13749 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, argument);
13750}
13751
13755static void
13756parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint8_t flags, uint16_t depth) {
13757 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
13758
13759 // First we need to check if the next token is one that could be the start
13760 // of an argument. If it's not, then we can just return.
13761 if (
13762 match2(parser, terminator, PM_TOKEN_EOF) ||
13763 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13764 context_terminator(parser->current_context->context, &parser->current)
13765 ) {
13766 return;
13767 }
13768
13769 bool parsed_first_argument = false;
13770 bool parsed_bare_hash = false;
13771 bool parsed_block_argument = false;
13772 bool parsed_forwarding_arguments = false;
13773
13774 while (!match1(parser, PM_TOKEN_EOF)) {
13775 if (parsed_forwarding_arguments) {
13776 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13777 }
13778
13779 pm_node_t *argument = NULL;
13780
13781 switch (parser->current.type) {
13782 case PM_TOKEN_USTAR_STAR:
13783 case PM_TOKEN_LABEL: {
13784 if (parsed_bare_hash) {
13785 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13786 }
13787
13788 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
13789 argument = UP(hash);
13790
13791 pm_static_literals_t hash_keys = { 0 };
13792 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13793
13794 parse_arguments_append(parser, arguments, argument);
13795
13796 pm_node_flags_t node_flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13797 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13798 pm_node_flag_set(UP(arguments->arguments), node_flags);
13799
13800 pm_static_literals_free(&hash_keys);
13801 parsed_bare_hash = true;
13802
13803 break;
13804 }
13805 case PM_TOKEN_UAMPERSAND: {
13806 parser_lex(parser);
13807 pm_token_t operator = parser->previous;
13808 pm_node_t *expression = NULL;
13809
13810 if (token_begins_expression_p(parser->current.type)) {
13811 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13812 } else {
13813 pm_parser_scope_forwarding_block_check(parser, &operator);
13814 }
13815
13816 argument = UP(pm_block_argument_node_create(parser, &operator, expression));
13817 if (parsed_block_argument) {
13818 parse_arguments_append(parser, arguments, argument);
13819 } else {
13820 arguments->block = argument;
13821 }
13822
13823 if (match1(parser, PM_TOKEN_COMMA)) {
13824 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13825 }
13826
13827 parsed_block_argument = true;
13828 break;
13829 }
13830 case PM_TOKEN_USTAR: {
13831 parser_lex(parser);
13832 pm_token_t operator = parser->previous;
13833
13834 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13835 pm_parser_scope_forwarding_positionals_check(parser, &operator);
13836 argument = UP(pm_splat_node_create(parser, &operator, NULL));
13837 if (parsed_bare_hash) {
13838 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13839 }
13840 } else {
13841 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13842
13843 if (parsed_bare_hash) {
13844 pm_parser_err(parser, PM_TOKEN_START(parser, &operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13845 }
13846
13847 argument = UP(pm_splat_node_create(parser, &operator, expression));
13848 }
13849
13850 parse_arguments_append(parser, arguments, argument);
13851 break;
13852 }
13853 case PM_TOKEN_UDOT_DOT_DOT: {
13854 if (accepts_forwarding) {
13855 parser_lex(parser);
13856
13857 if (token_begins_expression_p(parser->current.type)) {
13858 // If the token begins an expression then this ... was
13859 // not actually argument forwarding but was instead a
13860 // range.
13861 pm_token_t operator = parser->previous;
13862 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13863
13864 // If we parse a range, we need to validate that we
13865 // didn't accidentally violate the nonassoc rules of the
13866 // ... operator.
13867 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13868 pm_range_node_t *range = (pm_range_node_t *) right;
13869 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13870 }
13871
13872 argument = UP(pm_range_node_create(parser, NULL, &operator, right));
13873 } else {
13874 pm_parser_scope_forwarding_all_check(parser, &parser->previous);
13875 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13876 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13877 }
13878
13879 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->previous));
13880 parse_arguments_append(parser, arguments, argument);
13881 pm_node_flag_set(UP(arguments->arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13882 arguments->has_forwarding = true;
13883 parsed_forwarding_arguments = true;
13884 break;
13885 }
13886 }
13887 }
13889 default: {
13890 if (argument == NULL) {
13891 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (!parsed_first_argument ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0u) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13892 }
13893
13894 bool contains_keywords = false;
13895 bool contains_keyword_splat = false;
13896
13897 if (argument_allowed_for_bare_hash(parser, argument)) {
13898 if (parsed_bare_hash) {
13899 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13900 }
13901
13902 pm_token_t operator = { 0 };
13903 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
13904 operator = parser->previous;
13905 }
13906
13907 pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
13908 contains_keywords = true;
13909
13910 // Create the set of static literals for this hash.
13911 pm_static_literals_t hash_keys = { 0 };
13912 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13913
13914 // Finish parsing the one we are part way through.
13915 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13916 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(operator), value));
13917
13918 pm_keyword_hash_node_elements_append(parser->arena, bare_hash, argument);
13919 argument = UP(bare_hash);
13920
13921 // Then parse more if we have a comma
13922 if (accept1(parser, PM_TOKEN_COMMA) && (
13923 token_begins_expression_p(parser->current.type) ||
13924 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13925 )) {
13926 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13927 }
13928
13929 pm_static_literals_free(&hash_keys);
13930 parsed_bare_hash = true;
13931 }
13932
13933 parse_arguments_append(parser, arguments, argument);
13934
13935 pm_node_flags_t node_flags = 0;
13936 if (contains_keywords) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13937 if (contains_keyword_splat) node_flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13938 pm_node_flag_set(UP(arguments->arguments), node_flags);
13939
13940 break;
13941 }
13942 }
13943
13944 parsed_first_argument = true;
13945
13946 // If parsing the argument failed, we need to stop parsing arguments.
13947 if (PM_NODE_TYPE_P(argument, PM_ERROR_RECOVERY_NODE) || parser->recovering) break;
13948
13949 // If the terminator of these arguments is not EOF, then we have a
13950 // specific token we're looking for. In that case we can accept a
13951 // newline here because it is not functioning as a statement terminator.
13952 bool accepted_newline = false;
13953 if (terminator != PM_TOKEN_EOF) {
13954 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13955 }
13956
13957 if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
13958 // If we previously were on a comma and we just parsed a bare hash,
13959 // then we want to continue parsing arguments. This is because the
13960 // comma was grabbed up by the hash parser.
13961 } else if (accept1(parser, PM_TOKEN_COMMA)) {
13962 // If there was a comma, then we need to check if we also accepted a
13963 // newline. If we did, then this is a syntax error.
13964 if (accepted_newline) {
13965 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13966 }
13967
13968 // If this is a command call and an argument takes a block,
13969 // there can be no further arguments. For example,
13970 // `foo(bar 1 do end, 2)` should be rejected.
13971 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13972 pm_call_node_t *call = (pm_call_node_t *) argument;
13973 if (call->opening_loc.length == 0 && call->arguments != NULL && call->block != NULL) {
13974 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13975 break;
13976 }
13977 }
13978 } else {
13979 // If there is no comma at the end of the argument list then we're
13980 // done parsing arguments and can break out of this loop.
13981 break;
13982 }
13983
13984 // If we hit the terminator, then that means we have a trailing comma so
13985 // we can accept that output as well.
13986 if (match1(parser, terminator)) break;
13987 }
13988}
13989
14001parse_required_destructured_parameter(pm_parser_t *parser) {
14002 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14003
14004 pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14005 pm_multi_target_node_opening_set(parser, node, &parser->previous);
14006
14007 do {
14008 pm_node_t *param;
14009
14010 // If we get here then we have a trailing comma, which isn't allowed in
14011 // the grammar. In other places, multi targets _do_ allow trailing
14012 // commas, so here we'll assume this is a mistake of the user not
14013 // knowing it's not allowed here.
14014 if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14015 param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14016 pm_multi_target_node_targets_append(parser, node, param);
14017 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14018 break;
14019 }
14020
14021 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14022 param = UP(parse_required_destructured_parameter(parser));
14023 } else if (accept1(parser, PM_TOKEN_USTAR)) {
14024 pm_token_t star = parser->previous;
14025 pm_node_t *value = NULL;
14026
14027 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14028 pm_token_t name = parser->previous;
14029 value = UP(pm_required_parameter_node_create(parser, &name));
14030 if (pm_parser_parameter_name_check(parser, &name)) {
14031 pm_node_flag_set_repeated_parameter(value);
14032 }
14033 pm_parser_local_add_token(parser, &name, 1);
14034 }
14035
14036 param = UP(pm_splat_node_create(parser, &star, value));
14037 } else {
14038 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14039 pm_token_t name = parser->previous;
14040
14041 param = UP(pm_required_parameter_node_create(parser, &name));
14042 if (pm_parser_parameter_name_check(parser, &name)) {
14043 pm_node_flag_set_repeated_parameter(param);
14044 }
14045 pm_parser_local_add_token(parser, &name, 1);
14046 }
14047
14048 pm_multi_target_node_targets_append(parser, node, param);
14049 } while (accept1(parser, PM_TOKEN_COMMA));
14050
14051 accept1(parser, PM_TOKEN_NEWLINE);
14052 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14053 pm_multi_target_node_closing_set(parser, node, &parser->previous);
14054
14055 return node;
14056}
14057
14062typedef enum {
14063 PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14064 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14065 PM_PARAMETERS_ORDER_KEYWORDS_REST,
14066 PM_PARAMETERS_ORDER_KEYWORDS,
14067 PM_PARAMETERS_ORDER_REST,
14068 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14069 PM_PARAMETERS_ORDER_OPTIONAL,
14070 PM_PARAMETERS_ORDER_NAMED,
14071 PM_PARAMETERS_ORDER_NONE,
14072} pm_parameters_order_t;
14073
14077static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14078 [0] = PM_PARAMETERS_NO_CHANGE,
14079 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14080 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14081 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14082 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14083 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14084 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14085 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14086 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14087 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14088 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14089 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14090};
14091
14099static bool
14100update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14101 pm_parameters_order_t state = parameters_ordering[token->type];
14102 if (state == PM_PARAMETERS_NO_CHANGE) return true;
14103
14104 // If we see another ordered argument after a optional argument
14105 // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14106 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14107 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14108 return true;
14109 } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14110 return true;
14111 }
14112
14113 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14114 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14115 return false;
14116 } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14117 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14118 return false;
14119 } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14120 // We know what transition we failed on, so we can provide a better error here.
14121 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14122 return false;
14123 }
14124
14125 if (state < *current) *current = state;
14126 return true;
14127}
14128
14129static PRISM_INLINE void
14130parse_parameters_handle_trailing_comma(
14131 pm_parser_t *parser,
14132 pm_parameters_node_t *params,
14133 pm_parameters_order_t order,
14134 bool in_block,
14135 bool allows_trailing_comma
14136) {
14137 if (!allows_trailing_comma) {
14138 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14139 return;
14140 }
14141
14142 if (in_block) {
14143 if (order >= PM_PARAMETERS_ORDER_NAMED) {
14144 // foo do |bar,|; end
14145 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->previous));
14146
14147 if (params->rest == NULL) {
14148 pm_parameters_node_rest_set(params, param);
14149 } else {
14150 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
14151 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14152 }
14153 } else {
14154 // foo do |*bar,|; end
14155 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14156 }
14157 } else {
14158 // https://bugs.ruby-lang.org/issues/19107
14159 // Allow `def foo(bar,); end`, `def foo(*bar,); end`, etc. but not `def foo(...,); end`
14160 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1 || order == PM_PARAMETERS_ORDER_NOTHING_AFTER) {
14161 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14162 }
14163 }
14164}
14165
14169static pm_parameters_node_t *
14170parse_parameters(
14171 pm_parser_t *parser,
14172 pm_binding_power_t binding_power,
14173 bool uses_parentheses,
14174 bool allows_trailing_comma,
14175 bool allows_forwarding_parameters,
14176 bool accepts_blocks_in_defaults,
14177 bool in_block,
14178 pm_diagnostic_id_t diag_id_forwarding,
14179 uint16_t depth
14180) {
14181 pm_do_loop_stack_push(parser, false);
14182
14183 pm_parameters_node_t *params = pm_parameters_node_create(parser);
14184 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14185
14186 while (true) {
14187 bool parsing = true;
14188
14189 switch (parser->current.type) {
14190 case PM_TOKEN_PARENTHESIS_LEFT: {
14191 update_parameter_state(parser, &parser->current, &order);
14192 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
14193
14194 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14195 pm_parameters_node_requireds_append(parser->arena, params, param);
14196 } else {
14197 pm_parameters_node_posts_append(parser->arena, params, param);
14198 }
14199 break;
14200 }
14201 case PM_TOKEN_UAMPERSAND:
14202 case PM_TOKEN_AMPERSAND: {
14203 update_parameter_state(parser, &parser->current, &order);
14204 parser_lex(parser);
14205
14206 pm_token_t operator = parser->previous;
14207 pm_node_t *param;
14208
14209 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1 && accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14210 param = (pm_node_t *) pm_no_block_parameter_node_create(parser, &operator, &parser->previous);
14211 } else {
14212 pm_token_t name = {0};
14213
14214 bool repeated = false;
14215 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14216 name = parser->previous;
14217 repeated = pm_parser_parameter_name_check(parser, &name);
14218 pm_parser_local_add_token(parser, &name, 1);
14219 } else {
14220 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14221 }
14222
14223 param = (pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &operator);
14224 if (repeated) {
14225 pm_node_flag_set_repeated_parameter(param);
14226 }
14227 }
14228
14229 if (params->block == NULL) {
14230 pm_parameters_node_block_set(params, param);
14231 } else {
14232 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
14233 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
14234 }
14235
14236 break;
14237 }
14238 case PM_TOKEN_UDOT_DOT_DOT: {
14239 if (!allows_forwarding_parameters) {
14240 pm_parser_err_current(parser, diag_id_forwarding);
14241 }
14242
14243 bool succeeded = update_parameter_state(parser, &parser->current, &order);
14244 parser_lex(parser);
14245
14246 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14247 pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14248
14249 if (params->keyword_rest != NULL) {
14250 // If we already have a keyword rest parameter, then we replace it with the
14251 // forwarding parameter and move the keyword rest parameter to the posts list.
14252 pm_node_t *keyword_rest = params->keyword_rest;
14253 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, keyword_rest)));
14254 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14255 params->keyword_rest = NULL;
14256 }
14257
14258 pm_parameters_node_keyword_rest_set(params, UP(param));
14259 break;
14260 }
14261 case PM_TOKEN_CLASS_VARIABLE:
14262 case PM_TOKEN_IDENTIFIER:
14263 case PM_TOKEN_CONSTANT:
14264 case PM_TOKEN_INSTANCE_VARIABLE:
14265 case PM_TOKEN_GLOBAL_VARIABLE:
14266 case PM_TOKEN_METHOD_NAME: {
14267 parser_lex(parser);
14268 switch (parser->previous.type) {
14269 case PM_TOKEN_CONSTANT:
14270 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14271 break;
14272 case PM_TOKEN_INSTANCE_VARIABLE:
14273 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14274 break;
14275 case PM_TOKEN_GLOBAL_VARIABLE:
14276 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14277 break;
14278 case PM_TOKEN_CLASS_VARIABLE:
14279 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14280 break;
14281 case PM_TOKEN_METHOD_NAME:
14282 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14283 break;
14284 default: break;
14285 }
14286
14287 if (parser->current.type == PM_TOKEN_EQUAL) {
14288 update_parameter_state(parser, &parser->current, &order);
14289 } else {
14290 update_parameter_state(parser, &parser->previous, &order);
14291 }
14292
14293 pm_token_t name = parser->previous;
14294 bool repeated = pm_parser_parameter_name_check(parser, &name);
14295 pm_parser_local_add_token(parser, &name, 1);
14296
14297 if (match1(parser, PM_TOKEN_EQUAL)) {
14298 pm_token_t operator = parser->current;
14299 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14300 parser_lex(parser);
14301
14302 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14303 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14304
14305 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14306 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14307 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14308
14309 pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14310
14311 if (repeated) {
14312 pm_node_flag_set_repeated_parameter(UP(param));
14313 }
14314 pm_parameters_node_optionals_append(parser->arena, params, param);
14315
14316 // If the value of the parameter increased the number of
14317 // reads of that parameter, then we need to warn that we
14318 // have a circular definition.
14319 if ((parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14320 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
14321 }
14322
14323 context_pop(parser);
14324
14325 // If parsing the value of the parameter resulted in error recovery,
14326 // then we can put a missing node in its place and stop parsing the
14327 // parameters entirely now.
14328 if (parser->recovering) {
14329 parsing = false;
14330 break;
14331 }
14332 } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14333 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14334 if (repeated) {
14335 pm_node_flag_set_repeated_parameter(UP(param));
14336 }
14337 pm_parameters_node_requireds_append(parser->arena, params, UP(param));
14338 } else {
14339 pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14340 if (repeated) {
14341 pm_node_flag_set_repeated_parameter(UP(param));
14342 }
14343 pm_parameters_node_posts_append(parser->arena, params, UP(param));
14344 }
14345
14346 break;
14347 }
14348 case PM_TOKEN_LABEL: {
14349 if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14350 update_parameter_state(parser, &parser->current, &order);
14351
14352 context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14353 parser_lex(parser);
14354
14355 pm_token_t name = parser->previous;
14356 pm_token_t local = name;
14357 local.end -= 1;
14358
14359 if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14360 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14361 } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14362 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14363 }
14364
14365 bool repeated = pm_parser_parameter_name_check(parser, &local);
14366 pm_parser_local_add_token(parser, &local, 1);
14367
14368 switch (parser->current.type) {
14369 case PM_TOKEN_COMMA:
14370 case PM_TOKEN_PARENTHESIS_RIGHT:
14371 case PM_TOKEN_PIPE: {
14372 context_pop(parser);
14373
14374 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14375 if (repeated) {
14376 pm_node_flag_set_repeated_parameter(param);
14377 }
14378
14379 pm_parameters_node_keywords_append(parser->arena, params, param);
14380 break;
14381 }
14382 case PM_TOKEN_SEMICOLON:
14383 case PM_TOKEN_NEWLINE: {
14384 context_pop(parser);
14385
14386 if (uses_parentheses) {
14387 parsing = false;
14388 break;
14389 }
14390
14391 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14392 if (repeated) {
14393 pm_node_flag_set_repeated_parameter(param);
14394 }
14395
14396 pm_parameters_node_keywords_append(parser->arena, params, param);
14397 break;
14398 }
14399 default: {
14400 pm_node_t *param;
14401
14402 if (token_begins_expression_p(parser->current.type)) {
14403 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14404 uint32_t reads = parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14405
14406 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14407 pm_node_t *value = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14408 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14409
14410 if (parser->version <= PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14411 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14412 }
14413
14414 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14415 }
14416 else {
14417 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14418 }
14419
14420 if (repeated) {
14421 pm_node_flag_set_repeated_parameter(param);
14422 }
14423
14424 context_pop(parser);
14425 pm_parameters_node_keywords_append(parser->arena, params, param);
14426
14427 // If parsing the value of the parameter resulted in error recovery,
14428 // then we can put a missing node in its place and stop parsing the
14429 // parameters entirely now.
14430 if (parser->recovering) {
14431 parsing = false;
14432 break;
14433 }
14434 }
14435 }
14436
14437 parser->in_keyword_arg = false;
14438 break;
14439 }
14440 case PM_TOKEN_USTAR:
14441 case PM_TOKEN_STAR: {
14442 update_parameter_state(parser, &parser->current, &order);
14443 parser_lex(parser);
14444
14445 pm_token_t operator = parser->previous;
14446 pm_token_t name = { 0 };
14447 bool repeated = false;
14448
14449 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14450 name = parser->previous;
14451 repeated = pm_parser_parameter_name_check(parser, &name);
14452 pm_parser_local_add_token(parser, &name, 1);
14453 } else {
14454 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14455 }
14456
14457 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14458 if (repeated) {
14459 pm_node_flag_set_repeated_parameter(param);
14460 }
14461
14462 if (params->rest == NULL) {
14463 pm_parameters_node_rest_set(params, param);
14464 } else {
14465 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14466 pm_parameters_node_posts_append(parser->arena, params, param);
14467 }
14468
14469 break;
14470 }
14471 case PM_TOKEN_STAR_STAR:
14472 case PM_TOKEN_USTAR_STAR: {
14473 pm_parameters_order_t previous_order = order;
14474 update_parameter_state(parser, &parser->current, &order);
14475 parser_lex(parser);
14476
14477 pm_token_t operator = parser->previous;
14478 pm_node_t *param;
14479
14480 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14481 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14482 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14483 }
14484
14485 param = UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
14486 } else {
14487 pm_token_t name = { 0 };
14488
14489 bool repeated = false;
14490 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14491 name = parser->previous;
14492 repeated = pm_parser_parameter_name_check(parser, &name);
14493 pm_parser_local_add_token(parser, &name, 1);
14494 } else {
14495 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14496 }
14497
14498 param = UP(pm_keyword_rest_parameter_node_create(parser, &operator, NTOK2PTR(name)));
14499 if (repeated) {
14500 pm_node_flag_set_repeated_parameter(param);
14501 }
14502 }
14503
14504 if (params->keyword_rest == NULL) {
14505 pm_parameters_node_keyword_rest_set(params, param);
14506 } else {
14507 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14508 pm_parameters_node_posts_append(parser->arena, params, UP(pm_error_recovery_node_create_unexpected(parser, param)));
14509 }
14510
14511 break;
14512 }
14513 default:
14514 if (parser->previous.type == PM_TOKEN_COMMA) {
14515 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14516 }
14517
14518 parsing = false;
14519 break;
14520 }
14521
14522 // If we hit some kind of issue while parsing the parameter, this would
14523 // have been set to false. In that case, we need to break out of the
14524 // loop.
14525 if (!parsing) break;
14526
14527 bool accepted_newline = false;
14528 if (uses_parentheses) {
14529 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14530 }
14531
14532 if (accept1(parser, PM_TOKEN_COMMA)) {
14533 // If there was a comma, but we also accepted a newline, then this
14534 // is a syntax error.
14535 if (accepted_newline) {
14536 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14537 }
14538 } else {
14539 // If there was no comma, then we're done parsing parameters.
14540 break;
14541 }
14542 }
14543
14544 pm_do_loop_stack_pop(parser);
14545
14546 // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14547 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14548 return NULL;
14549 }
14550
14551 return params;
14552}
14553
14558static size_t
14559token_newline_index(const pm_parser_t *parser) {
14560 if (parser->heredoc_end == NULL) {
14561 // This is the common case. In this case we can look at the previously
14562 // recorded newline in the newline list and subtract from the current
14563 // offset.
14564 return parser->line_offsets.size - 1;
14565 } else {
14566 // This is unlikely. This is the case that we have already parsed the
14567 // start of a heredoc, so we cannot rely on looking at the previous
14568 // offset of the newline list, and instead must go through the whole
14569 // process of a binary search for the line number.
14570 return (size_t) pm_line_offset_list_line(&parser->line_offsets, PM_TOKEN_START(parser, &parser->current), 0);
14571 }
14572}
14573
14578static int64_t
14579token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14580 const uint8_t *cursor = parser->start + parser->line_offsets.offsets[newline_index];
14581 const uint8_t *end = token->start;
14582
14583 // Skip over the BOM if it is present.
14584 if (
14585 newline_index == 0 &&
14586 parser->start[0] == 0xef &&
14587 parser->start[1] == 0xbb &&
14588 parser->start[2] == 0xbf
14589 ) cursor += 3;
14590
14591 int64_t column = 0;
14592 for (; cursor < end; cursor++) {
14593 switch (*cursor) {
14594 case '\t':
14595 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14596 break;
14597 case ' ':
14598 column++;
14599 break;
14600 default:
14601 column++;
14602 if (break_on_non_space) return -1;
14603 break;
14604 }
14605 }
14606
14607 return column;
14608}
14609
14614static void
14615parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14616 // If these warnings are disabled (unlikely), then we can just return.
14617 if (!parser->warn_mismatched_indentation) return;
14618
14619 // If the tokens are on the same line, we do not warn.
14620 size_t closing_newline_index = token_newline_index(parser);
14621 if (opening_newline_index == closing_newline_index) return;
14622
14623 // If the opening token has anything other than spaces or tabs before it,
14624 // then we do not warn. This is unless we are matching up an `if`/`end` pair
14625 // and the `if` immediately follows an `else` keyword.
14626 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14627 if (!if_after_else && (opening_column == -1)) return;
14628
14629 // Get a reference to the closing token off the current parser. This assumes
14630 // that the caller has placed this in the correct position.
14631 pm_token_t *closing_token = &parser->current;
14632
14633 // If the tokens are at the same indentation, we do not warn.
14634 int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14635 if ((closing_column == -1) || (opening_column == closing_column)) return;
14636
14637 // If the closing column is greater than the opening column and we are
14638 // allowing indentation, then we do not warn.
14639 if (allow_indent && (closing_column > opening_column)) return;
14640
14641 // Otherwise, add a warning.
14642 PM_PARSER_WARN_FORMAT(
14643 parser,
14644 PM_TOKEN_START(parser, closing_token),
14645 PM_TOKEN_LENGTH(closing_token),
14646 PM_WARN_INDENTATION_MISMATCH,
14647 (int) (closing_token->end - closing_token->start),
14648 (const char *) closing_token->start,
14649 (int) (opening_token->end - opening_token->start),
14650 (const char *) opening_token->start,
14651 ((int32_t) opening_newline_index) + parser->start_line
14652 );
14653}
14654
14655typedef enum {
14656 PM_RESCUES_BEGIN = 1,
14657 PM_RESCUES_BLOCK,
14658 PM_RESCUES_CLASS,
14659 PM_RESCUES_DEF,
14660 PM_RESCUES_LAMBDA,
14661 PM_RESCUES_MODULE,
14662 PM_RESCUES_SCLASS
14663} pm_rescues_type_t;
14664
14669static PRISM_INLINE void
14670parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14671 pm_rescue_node_t *current = NULL;
14672
14673 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14674 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14675 parser_lex(parser);
14676
14677 pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14678
14679 switch (parser->current.type) {
14680 case PM_TOKEN_EQUAL_GREATER: {
14681 // Here we have an immediate => after the rescue keyword, in which case
14682 // we're going to have an empty list of exceptions to rescue (which
14683 // implies StandardError).
14684 parser_lex(parser);
14685 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14686
14687 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14688 reference = parse_target(parser, reference, false, false);
14689
14690 pm_rescue_node_reference_set(rescue, reference);
14691 break;
14692 }
14693 case PM_TOKEN_NEWLINE:
14694 case PM_TOKEN_SEMICOLON:
14695 case PM_TOKEN_KEYWORD_THEN:
14696 // Here we have a terminator for the rescue keyword, in which
14697 // case we're going to just continue on.
14698 break;
14699 default: {
14700 if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
14701 // Here we have something that could be an exception expression, so
14702 // we'll attempt to parse it here and any others delimited by commas.
14703
14704 do {
14705 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14706 pm_rescue_node_exceptions_append(parser->arena, rescue, expression);
14707
14708 // If we hit a newline, then this is the end of the rescue expression. We
14709 // can continue on to parse the statements.
14710 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
14711
14712 // If we hit a `=>` then we're going to parse the exception variable. Once
14713 // we've done that, we'll break out of the loop and parse the statements.
14714 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14715 pm_rescue_node_operator_set(parser, rescue, &parser->previous);
14716
14717 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14718 reference = parse_target(parser, reference, false, false);
14719
14720 pm_rescue_node_reference_set(rescue, reference);
14721 break;
14722 }
14723 } while (accept1(parser, PM_TOKEN_COMMA));
14724 }
14725 }
14726 }
14727
14728 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14729 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14730 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14731 }
14732 } else {
14733 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14734 rescue->then_keyword_loc = TOK2LOC(parser, &parser->previous);
14735 }
14736
14737 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14738 pm_accepts_block_stack_push(parser, true);
14739 pm_context_t context;
14740
14741 switch (type) {
14742 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
14743 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
14744 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
14745 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
14746 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
14747 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
14748 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
14749 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14750 }
14751
14752 pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14753 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14754
14755 pm_accepts_block_stack_pop(parser);
14756 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14757 }
14758
14759 if (current == NULL) {
14760 pm_begin_node_rescue_clause_set(parent_node, rescue);
14761 } else {
14762 pm_rescue_node_subsequent_set(current, rescue);
14763 }
14764
14765 current = rescue;
14766 }
14767
14768 // The end node locations on rescue nodes will not be set correctly
14769 // since we won't know the end until we've found all subsequent
14770 // clauses. This sets the end location on all rescues once we know it.
14771 if (current != NULL) {
14772 pm_rescue_node_t *clause = parent_node->rescue_clause;
14773
14774 while (clause != NULL) {
14775 PM_NODE_LENGTH_SET_NODE(clause, current);
14776 clause = clause->subsequent;
14777 }
14778 }
14779
14780 pm_token_t else_keyword;
14781 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14782 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14783 opening_newline_index = token_newline_index(parser);
14784
14785 else_keyword = parser->current;
14786 opening = &else_keyword;
14787
14788 parser_lex(parser);
14789 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14790
14791 pm_statements_node_t *else_statements = NULL;
14792 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14793 pm_accepts_block_stack_push(parser, true);
14794 pm_context_t context;
14795
14796 switch (type) {
14797 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
14798 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
14799 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
14800 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
14801 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
14802 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
14803 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
14804 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
14805 }
14806
14807 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14808 pm_accepts_block_stack_pop(parser);
14809
14810 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14811 }
14812
14813 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
14814 pm_begin_node_else_clause_set(parent_node, else_clause);
14815
14816 // If we don't have a `current` rescue node, then this is a dangling
14817 // else, and it's an error.
14818 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14819 }
14820
14821 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14822 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14823 pm_token_t ensure_keyword = parser->current;
14824
14825 parser_lex(parser);
14826 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14827
14828 pm_statements_node_t *ensure_statements = NULL;
14829 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14830 pm_accepts_block_stack_push(parser, true);
14831 pm_context_t context;
14832
14833 switch (type) {
14834 case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
14835 case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
14836 case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
14837 case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
14838 case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
14839 case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
14840 case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
14841 default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
14842 }
14843
14844 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14845 pm_accepts_block_stack_pop(parser);
14846
14847 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14848 }
14849
14850 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
14851 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14852 }
14853
14854 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14855 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14856 pm_begin_node_end_keyword_set(parser, parent_node, &parser->current);
14857 } else {
14858 pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_KEYWORD_END, .start = parser->previous.end, .end = parser->previous.end };
14859 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14860 }
14861}
14862
14867static pm_begin_node_t *
14868parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
14869 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14870 parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
14871
14872 node->base.location.start = U32(start - parser->start);
14873 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->current);
14874
14875 return node;
14876}
14877
14882parse_block_parameters(
14883 pm_parser_t *parser,
14884 bool allows_trailing_comma,
14885 const pm_token_t *opening,
14886 bool is_lambda_literal,
14887 bool accepts_blocks_in_defaults,
14888 uint16_t depth
14889) {
14890 pm_parameters_node_t *parameters = NULL;
14891 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14892 if (!is_lambda_literal) {
14893 context_push(parser, PM_CONTEXT_BLOCK_PARAMETERS);
14894 }
14895 parameters = parse_parameters(
14896 parser,
14897 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14898 false,
14899 allows_trailing_comma,
14900 false,
14901 accepts_blocks_in_defaults,
14902 true,
14903 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14904 (uint16_t) (depth + 1)
14905 );
14906 if (!is_lambda_literal) {
14907 context_pop(parser);
14908 }
14909 }
14910
14911 pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
14912 if (opening != NULL) {
14913 accept1(parser, PM_TOKEN_NEWLINE);
14914
14915 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14916 do {
14917 switch (parser->current.type) {
14918 case PM_TOKEN_CONSTANT:
14919 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14920 parser_lex(parser);
14921 break;
14922 case PM_TOKEN_INSTANCE_VARIABLE:
14923 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14924 parser_lex(parser);
14925 break;
14926 case PM_TOKEN_GLOBAL_VARIABLE:
14927 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14928 parser_lex(parser);
14929 break;
14930 case PM_TOKEN_CLASS_VARIABLE:
14931 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14932 parser_lex(parser);
14933 break;
14934 default:
14935 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14936 break;
14937 }
14938
14939 bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
14940 pm_parser_local_add_token(parser, &parser->previous, 1);
14941
14942 pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
14943 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14944
14945 pm_block_parameters_node_append_local(parser->arena, block_parameters, local);
14946 } while (accept1(parser, PM_TOKEN_COMMA));
14947 }
14948 }
14949
14950 return block_parameters;
14951}
14952
14957static bool
14958outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
14959 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
14960 if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
14961 }
14962
14963 return false;
14964}
14965
14971static const char * const pm_numbered_parameter_names[] = {
14972 "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
14973};
14974
14980static pm_node_t *
14981parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
14982 pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
14983
14984 // If we have ordinary parameters, then we will return them as the set of
14985 // parameters.
14986 if (parameters != NULL) {
14987 // If we also have implicit parameters, then this is an error.
14988 if (implicit_parameters->size > 0) {
14989 pm_node_t *node = implicit_parameters->nodes[0];
14990
14991 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14992 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14993 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14994 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14995 } else {
14996 assert(false && "unreachable");
14997 }
14998 }
14999
15000 return parameters;
15001 }
15002
15003 // If we don't have any implicit parameters, then the set of parameters is
15004 // NULL.
15005 if (implicit_parameters->size == 0) {
15006 return NULL;
15007 }
15008
15009 // If we don't have ordinary parameters, then we now must validate our set
15010 // of implicit parameters. We can only have numbered parameters or it, but
15011 // they cannot be mixed.
15012 uint8_t numbered_parameter = 0;
15013 bool it_parameter = false;
15014
15015 for (size_t index = 0; index < implicit_parameters->size; index++) {
15016 pm_node_t *node = implicit_parameters->nodes[index];
15017
15018 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
15019 if (it_parameter) {
15020 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15021 } else if (outer_scope_using_numbered_parameters_p(parser)) {
15022 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15023 } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15024 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15025 } else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
15026 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->start[node->location.start + 1] - '0'));
15027 } else {
15028 assert(false && "unreachable");
15029 }
15030 } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15031 if (numbered_parameter > 0) {
15032 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15033 } else {
15034 it_parameter = true;
15035 }
15036 }
15037 }
15038
15039 if (numbered_parameter > 0) {
15040 // Go through the parent scopes and mark them as being disallowed from
15041 // using numbered parameters because this inner scope is using them.
15042 for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15043 scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15044 }
15045 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
15046 }
15047
15048 if (it_parameter) {
15049 return UP(pm_it_parameters_node_create(parser, opening, closing));
15050 }
15051
15052 return NULL;
15053}
15054
15058static pm_block_node_t *
15059parse_block(pm_parser_t *parser, uint16_t depth) {
15060 pm_token_t opening = parser->previous;
15061 accept1(parser, PM_TOKEN_NEWLINE);
15062
15063 pm_accepts_block_stack_push(parser, true);
15064 pm_parser_scope_push(parser, false);
15065
15066 pm_block_parameters_node_t *block_parameters = NULL;
15067
15068 if (accept1(parser, PM_TOKEN_PIPE)) {
15069 pm_token_t block_parameters_opening = parser->previous;
15070 if (match1(parser, PM_TOKEN_PIPE)) {
15071 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15072 parser->command_start = true;
15073 parser_lex(parser);
15074 } else {
15075 block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15076 accept1(parser, PM_TOKEN_NEWLINE);
15077 parser->command_start = true;
15078 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15079 }
15080
15081 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
15082 }
15083
15084 accept1(parser, PM_TOKEN_NEWLINE);
15085 pm_node_t *statements = NULL;
15086
15087 if (opening.type == PM_TOKEN_BRACE_LEFT) {
15088 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15089 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1)));
15090 }
15091
15092 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
15093 } else {
15094 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15095 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
15096 pm_accepts_block_stack_push(parser, true);
15097 statements = UP(parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1)));
15098 pm_accepts_block_stack_pop(parser);
15099 }
15100
15101 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15102 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15103 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
15104 }
15105 }
15106
15107 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
15108 }
15109
15110 pm_constant_id_list_t locals;
15111 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15112 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->previous);
15113
15114 pm_parser_scope_pop(parser);
15115 pm_accepts_block_stack_pop(parser);
15116
15117 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15118}
15119
15125static bool
15126parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, uint8_t flags, uint16_t depth) {
15127 /* Fast path: if the current token can't begin an expression and isn't
15128 * a parenthesis, block opener, or splat/block-pass operator, there are
15129 * no arguments to parse. */
15130 if (
15131 !token_begins_expression_p(parser->current.type) &&
15132 !match6(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)
15133 ) {
15134 return false;
15135 }
15136
15137 bool found = false;
15138 bool parsed_command_args = false;
15139
15140 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15141 found |= true;
15142 arguments->opening_loc = TOK2LOC(parser, &parser->previous);
15143
15144 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15145 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
15146 } else {
15147 pm_accepts_block_stack_push(parser, true);
15148 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
15149
15150 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15151 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_str(parser->current.type));
15152 parser->previous.start = parser->previous.end;
15153 parser->previous.type = 0;
15154 }
15155
15156 pm_accepts_block_stack_pop(parser);
15157 arguments->closing_loc = TOK2LOC(parser, &parser->previous);
15158 }
15159 } else if ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15160 found |= true;
15161 parsed_command_args = true;
15162 pm_accepts_block_stack_push(parser, false);
15163
15164 // If we get here, then the subsequent token cannot be used as an infix
15165 // operator. In this case we assume the subsequent token is part of an
15166 // argument to this method call.
15167 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
15168
15169 // If we have done with the arguments and still not consumed the comma,
15170 // then we have a trailing comma where we need to check whether it is
15171 // allowed or not.
15172 if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15173 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_str(parser->current.type));
15174 }
15175
15176 pm_accepts_block_stack_pop(parser);
15177 }
15178
15179 // If we're at the end of the arguments, we can now check if there is a block
15180 // node that starts with a {. If there is, then we can parse it and add it to
15181 // the arguments.
15182 if (accepts_block) {
15183 pm_block_node_t *block = NULL;
15184
15185 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15186 found |= true;
15187 block = parse_block(parser, (uint16_t) (depth + 1));
15188 pm_arguments_validate_block(parser, arguments, block);
15189 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15190 found |= true;
15191 block = parse_block(parser, (uint16_t) (depth + 1));
15192 } else if (parsed_command_args && pm_accepts_block_stack_p(parser) && (flags & PM_PARSE_ACCEPTS_DO_BLOCK) && accept1(parser, PM_TOKEN_KEYWORD_DO_BLOCK)) {
15193 found |= true;
15194 block = parse_block(parser, (uint16_t) (depth + 1));
15195 }
15196
15197 if (block != NULL) {
15198 if (arguments->block == NULL && !arguments->has_forwarding) {
15199 arguments->block = UP(block);
15200 } else {
15201 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
15202
15203 if (arguments->block != NULL) {
15204 if (arguments->arguments == NULL) {
15205 arguments->arguments = pm_arguments_node_create(parser);
15206 }
15207 pm_arguments_node_arguments_append(parser->arena, arguments->arguments, arguments->block);
15208 }
15209 arguments->block = UP(block);
15210 }
15211 }
15212 }
15213
15214 return found;
15215}
15216
15221static void
15222parse_return(pm_parser_t *parser, pm_node_t *node) {
15223 bool in_sclass = false;
15224 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15225 switch (context_node->context) {
15226 case PM_CONTEXT_BEGIN_ELSE:
15227 case PM_CONTEXT_BEGIN_ENSURE:
15228 case PM_CONTEXT_BEGIN_RESCUE:
15229 case PM_CONTEXT_BEGIN:
15230 case PM_CONTEXT_CASE_IN:
15231 case PM_CONTEXT_CASE_WHEN:
15232 case PM_CONTEXT_DEFAULT_PARAMS:
15233 case PM_CONTEXT_DEFINED:
15234 case PM_CONTEXT_ELSE:
15235 case PM_CONTEXT_ELSIF:
15236 case PM_CONTEXT_EMBEXPR:
15237 case PM_CONTEXT_FOR_INDEX:
15238 case PM_CONTEXT_FOR:
15239 case PM_CONTEXT_IF:
15240 case PM_CONTEXT_LOOP_PREDICATE:
15241 case PM_CONTEXT_MAIN:
15242 case PM_CONTEXT_MULTI_TARGET:
15243 case PM_CONTEXT_PARENS:
15244 case PM_CONTEXT_POSTEXE:
15245 case PM_CONTEXT_PREDICATE:
15246 case PM_CONTEXT_PREEXE:
15247 case PM_CONTEXT_RESCUE_MODIFIER:
15248 case PM_CONTEXT_TERNARY:
15249 case PM_CONTEXT_UNLESS:
15250 case PM_CONTEXT_UNTIL:
15251 case PM_CONTEXT_WHILE:
15252 // Keep iterating up the lists of contexts, because returns can
15253 // see through these.
15254 continue;
15255 case PM_CONTEXT_SCLASS_ELSE:
15256 case PM_CONTEXT_SCLASS_ENSURE:
15257 case PM_CONTEXT_SCLASS_RESCUE:
15258 case PM_CONTEXT_SCLASS:
15259 in_sclass = true;
15260 continue;
15261 case PM_CONTEXT_CLASS_ELSE:
15262 case PM_CONTEXT_CLASS_ENSURE:
15263 case PM_CONTEXT_CLASS_RESCUE:
15264 case PM_CONTEXT_CLASS:
15265 case PM_CONTEXT_MODULE_ELSE:
15266 case PM_CONTEXT_MODULE_ENSURE:
15267 case PM_CONTEXT_MODULE_RESCUE:
15268 case PM_CONTEXT_MODULE:
15269 // These contexts are invalid for a return.
15270 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15271 return;
15272 case PM_CONTEXT_BLOCK_BRACES:
15273 case PM_CONTEXT_BLOCK_ELSE:
15274 case PM_CONTEXT_BLOCK_ENSURE:
15275 case PM_CONTEXT_BLOCK_KEYWORDS:
15276 case PM_CONTEXT_BLOCK_RESCUE:
15277 case PM_CONTEXT_BLOCK_PARAMETERS:
15278 case PM_CONTEXT_DEF_ELSE:
15279 case PM_CONTEXT_DEF_ENSURE:
15280 case PM_CONTEXT_DEF_PARAMS:
15281 case PM_CONTEXT_DEF_RESCUE:
15282 case PM_CONTEXT_DEF:
15283 case PM_CONTEXT_LAMBDA_BRACES:
15284 case PM_CONTEXT_LAMBDA_DO_END:
15285 case PM_CONTEXT_LAMBDA_ELSE:
15286 case PM_CONTEXT_LAMBDA_ENSURE:
15287 case PM_CONTEXT_LAMBDA_RESCUE:
15288 // These contexts are valid for a return, and we should not
15289 // continue to loop.
15290 return;
15291 case PM_CONTEXT_NONE:
15292 // This case should never happen.
15293 assert(false && "unreachable");
15294 break;
15295 }
15296 }
15297 if (in_sclass && parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
15298 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15299 }
15300}
15301
15306static void
15307parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15308 for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15309 switch (context_node->context) {
15310 case PM_CONTEXT_BLOCK_BRACES:
15311 case PM_CONTEXT_BLOCK_KEYWORDS:
15312 case PM_CONTEXT_BLOCK_ELSE:
15313 case PM_CONTEXT_BLOCK_ENSURE:
15314 case PM_CONTEXT_BLOCK_PARAMETERS:
15315 case PM_CONTEXT_BLOCK_RESCUE:
15316 case PM_CONTEXT_DEFINED:
15317 case PM_CONTEXT_FOR:
15318 case PM_CONTEXT_LAMBDA_BRACES:
15319 case PM_CONTEXT_LAMBDA_DO_END:
15320 case PM_CONTEXT_LAMBDA_ELSE:
15321 case PM_CONTEXT_LAMBDA_ENSURE:
15322 case PM_CONTEXT_LAMBDA_RESCUE:
15323 case PM_CONTEXT_LOOP_PREDICATE:
15324 case PM_CONTEXT_UNTIL:
15325 case PM_CONTEXT_WHILE:
15326 // These are the good cases. We're allowed to have a block exit
15327 // in these contexts.
15328 return;
15329 case PM_CONTEXT_POSTEXE:
15330 // https://bugs.ruby-lang.org/issues/20409
15331 if (context_node->context == PM_CONTEXT_POSTEXE) {
15332 if (parser->version < PM_OPTIONS_VERSION_CRUBY_4_1) {
15333 return;
15334 }
15335 if (PM_NODE_TYPE_P(node, PM_NEXT_NODE)) {
15336 return;
15337 }
15338 }
15340 case PM_CONTEXT_DEF:
15341 case PM_CONTEXT_DEF_PARAMS:
15342 case PM_CONTEXT_DEF_ELSE:
15343 case PM_CONTEXT_DEF_ENSURE:
15344 case PM_CONTEXT_DEF_RESCUE:
15345 case PM_CONTEXT_MAIN:
15346 case PM_CONTEXT_PREEXE:
15347 case PM_CONTEXT_SCLASS:
15348 case PM_CONTEXT_SCLASS_ELSE:
15349 case PM_CONTEXT_SCLASS_ENSURE:
15350 case PM_CONTEXT_SCLASS_RESCUE:
15351 // These are the bad cases. We're not allowed to have a block
15352 // exit in these contexts.
15353 //
15354 // If we get here, then we're about to mark this block exit
15355 // as invalid. However, it could later _become_ valid if we
15356 // find a trailing while/until on the expression. In this
15357 // case instead of adding the error here, we'll add the
15358 // block exit to the list of exits for the expression, and
15359 // the node parsing will handle validating it instead.
15360 assert(parser->current_block_exits != NULL);
15361 pm_node_list_append(parser->arena, parser->current_block_exits, node);
15362 return;
15363 case PM_CONTEXT_BEGIN_ELSE:
15364 case PM_CONTEXT_BEGIN_ENSURE:
15365 case PM_CONTEXT_BEGIN_RESCUE:
15366 case PM_CONTEXT_BEGIN:
15367 case PM_CONTEXT_CASE_IN:
15368 case PM_CONTEXT_CASE_WHEN:
15369 case PM_CONTEXT_CLASS_ELSE:
15370 case PM_CONTEXT_CLASS_ENSURE:
15371 case PM_CONTEXT_CLASS_RESCUE:
15372 case PM_CONTEXT_CLASS:
15373 case PM_CONTEXT_DEFAULT_PARAMS:
15374 case PM_CONTEXT_ELSE:
15375 case PM_CONTEXT_ELSIF:
15376 case PM_CONTEXT_EMBEXPR:
15377 case PM_CONTEXT_FOR_INDEX:
15378 case PM_CONTEXT_IF:
15379 case PM_CONTEXT_MODULE_ELSE:
15380 case PM_CONTEXT_MODULE_ENSURE:
15381 case PM_CONTEXT_MODULE_RESCUE:
15382 case PM_CONTEXT_MODULE:
15383 case PM_CONTEXT_MULTI_TARGET:
15384 case PM_CONTEXT_PARENS:
15385 case PM_CONTEXT_PREDICATE:
15386 case PM_CONTEXT_RESCUE_MODIFIER:
15387 case PM_CONTEXT_TERNARY:
15388 case PM_CONTEXT_UNLESS:
15389 // In these contexts we should continue walking up the list of
15390 // contexts.
15391 break;
15392 case PM_CONTEXT_NONE:
15393 // This case should never happen.
15394 assert(false && "unreachable");
15395 break;
15396 }
15397 }
15398}
15399
15404static pm_node_list_t *
15405push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15406 pm_node_list_t *previous_block_exits = parser->current_block_exits;
15407 parser->current_block_exits = current_block_exits;
15408 return previous_block_exits;
15409}
15410
15416static void
15417flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15418 pm_node_t *block_exit;
15419 PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15420 const char *type;
15421
15422 switch (PM_NODE_TYPE(block_exit)) {
15423 case PM_BREAK_NODE: type = "break"; break;
15424 case PM_NEXT_NODE: type = "next"; break;
15425 case PM_REDO_NODE: type = "redo"; break;
15426 default: assert(false && "unreachable"); type = ""; break;
15427 }
15428
15429 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15430 }
15431
15432 parser->current_block_exits = previous_block_exits;
15433}
15434
15439static void
15440pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15441 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15442 // If we matched a trailing while/until, then all of the block exits in
15443 // the contained list are valid. In this case we do not need to do
15444 // anything.
15445 parser->current_block_exits = previous_block_exits;
15446 } else if (previous_block_exits != NULL) {
15447 // If we did not matching a trailing while/until, then all of the block
15448 // exits contained in the list are invalid for this specific context.
15449 // However, they could still become valid in a higher level context if
15450 // there is another list above this one. In this case we'll push all of
15451 // the block exits up to the previous list.
15452 pm_node_list_concat(parser->arena, previous_block_exits, parser->current_block_exits);
15453 parser->current_block_exits = previous_block_exits;
15454 } else {
15455 // If we did not match a trailing while/until and this was the last
15456 // chance to do so, then all of the block exits in the list are invalid
15457 // and we need to add an error for each of them.
15458 flush_block_exits(parser, previous_block_exits);
15459 }
15460}
15461
15462static PRISM_INLINE pm_node_t *
15463parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15464 context_push(parser, PM_CONTEXT_PREDICATE);
15465 pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15466 pm_node_t *predicate = parse_value_expression(parser, binding_power, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, error_id, (uint16_t) (depth + 1));
15467
15468 // Predicates are closed by a term, a "then", or a term and then a "then".
15469 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15470
15471 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15472 predicate_closed = true;
15473 *then_keyword = parser->previous;
15474 }
15475
15476 if (!predicate_closed) {
15477 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15478 }
15479
15480 context_pop(parser);
15481 return predicate;
15482}
15483
15484static PRISM_INLINE pm_node_t *
15485parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15486 pm_node_list_t current_block_exits = { 0 };
15487 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15488
15489 pm_token_t keyword = parser->previous;
15490 pm_token_t then_keyword = { 0 };
15491
15492 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15493 pm_statements_node_t *statements = NULL;
15494
15495 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15496 pm_accepts_block_stack_push(parser, true);
15497 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15498 pm_accepts_block_stack_pop(parser);
15499 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15500 }
15501
15502 pm_node_t *parent = NULL;
15503
15504 switch (context) {
15505 case PM_CONTEXT_IF:
15506 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15507 break;
15508 case PM_CONTEXT_UNLESS:
15509 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15510 break;
15511 default:
15512 assert(false && "unreachable");
15513 break;
15514 }
15515
15516 pm_node_t *current = parent;
15517
15518 // Parse any number of elsif clauses. This will form a linked list of if
15519 // nodes pointing to each other from the top.
15520 if (context == PM_CONTEXT_IF) {
15521 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15522 if (parser_end_of_line_p(parser)) {
15523 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
15524 }
15525
15526 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15527 pm_token_t elsif_keyword = parser->current;
15528 parser_lex(parser);
15529
15530 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15531 pm_accepts_block_stack_push(parser, true);
15532
15533 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15534 pm_accepts_block_stack_pop(parser);
15535 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15536
15537 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15538 ((pm_if_node_t *) current)->subsequent = elsif;
15539 current = elsif;
15540 }
15541 }
15542
15543 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15544 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15545 opening_newline_index = token_newline_index(parser);
15546
15547 parser_lex(parser);
15548 pm_token_t else_keyword = parser->previous;
15549
15550 pm_accepts_block_stack_push(parser, true);
15551 pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15552 pm_accepts_block_stack_pop(parser);
15553
15554 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15555 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15556 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15557
15558 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15559
15560 switch (context) {
15561 case PM_CONTEXT_IF:
15562 ((pm_if_node_t *) current)->subsequent = UP(else_node);
15563 break;
15564 case PM_CONTEXT_UNLESS:
15565 ((pm_unless_node_t *) parent)->else_clause = else_node;
15566 break;
15567 default:
15568 assert(false && "unreachable");
15569 break;
15570 }
15571 } else {
15572 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15573 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15574 }
15575
15576 // Set the appropriate end location for all of the nodes in the subtree.
15577 switch (context) {
15578 case PM_CONTEXT_IF: {
15579 pm_node_t *current = parent;
15580 bool recursing = true;
15581
15582 while (recursing) {
15583 switch (PM_NODE_TYPE(current)) {
15584 case PM_IF_NODE:
15585 pm_if_node_end_keyword_loc_set(parser, (pm_if_node_t *) current, &parser->previous);
15586 current = ((pm_if_node_t *) current)->subsequent;
15587 recursing = current != NULL;
15588 break;
15589 case PM_ELSE_NODE:
15590 pm_else_node_end_keyword_loc_set(parser, (pm_else_node_t *) current, &parser->previous);
15591 recursing = false;
15592 break;
15593 default: {
15594 recursing = false;
15595 break;
15596 }
15597 }
15598 }
15599 break;
15600 }
15601 case PM_CONTEXT_UNLESS:
15602 pm_unless_node_end_keyword_loc_set(parser, (pm_unless_node_t *) parent, &parser->previous);
15603 break;
15604 default:
15605 assert(false && "unreachable");
15606 break;
15607 }
15608
15609 pop_block_exits(parser, previous_block_exits);
15610 return parent;
15611}
15612
15617#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15618 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15619 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15620 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_BLOCK: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15621 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15622 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15623 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15624 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15625 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15626 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15627 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15628
15633#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15634 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15635 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15636 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15637 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15638 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15639 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15640 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15641
15647#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15648 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15649 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15650 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15651 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15652 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15653 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15654 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15655 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15656
15661#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15662 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15663 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15664 case PM_TOKEN_CLASS_VARIABLE
15665
15670#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15671 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15672 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15673 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15674
15675// Assert here that the flags are the same so that we can safely switch the type
15676// of the node without having to move the flags.
15677PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15678
15683static PRISM_INLINE pm_node_flags_t
15684parse_unescaped_encoding(const pm_parser_t *parser) {
15685 if (parser->explicit_encoding != NULL) {
15686 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
15687 // If the there's an explicit encoding and it's using a UTF-8 escape
15688 // sequence, then mark the string as UTF-8.
15689 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15690 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15691 // If there's a non-UTF-8 escape sequence being used, then the
15692 // string uses the source encoding, unless the source is marked as
15693 // US-ASCII. In that case the string is forced as ASCII-8BIT in
15694 // order to keep the string valid.
15695 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15696 }
15697 }
15698 return 0;
15699}
15700
15705static pm_node_t *
15706parse_string_part(pm_parser_t *parser, uint16_t depth) {
15707 switch (parser->current.type) {
15708 // Here the lexer has returned to us plain string content. In this case
15709 // we'll create a string node that has no opening or closing and return that
15710 // as the part. These kinds of parts look like:
15711 //
15712 // "aaa #{bbb} #@ccc ddd"
15713 // ^^^^ ^ ^^^^
15714 case PM_TOKEN_STRING_CONTENT: {
15715 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
15716 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15717
15718 parser_lex(parser);
15719 return node;
15720 }
15721 // Here the lexer has returned the beginning of an embedded expression. In
15722 // that case we'll parse the inner statements and return that as the part.
15723 // These kinds of parts look like:
15724 //
15725 // "aaa #{bbb} #@ccc ddd"
15726 // ^^^^^^
15727 case PM_TOKEN_EMBEXPR_BEGIN: {
15728 // Ruby disallows seeing encoding around interpolation in strings,
15729 // even though it is known at parse time.
15730 parser->explicit_encoding = NULL;
15731
15732 pm_lex_state_t state = parser->lex_state;
15733 int brace_nesting = parser->brace_nesting;
15734
15735 parser->brace_nesting = 0;
15736 lex_state_set(parser, PM_LEX_STATE_BEG);
15737 parser_lex(parser);
15738
15739 pm_token_t opening = parser->previous;
15740 pm_statements_node_t *statements = NULL;
15741
15742 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15743 pm_accepts_block_stack_push(parser, true);
15744 statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
15745 pm_accepts_block_stack_pop(parser);
15746 }
15747
15748 parser->brace_nesting = brace_nesting;
15749 lex_state_set(parser, state);
15750 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15751
15752 // If this set of embedded statements only contains a single
15753 // statement, then Ruby does not consider it as a possible statement
15754 // that could emit a line event.
15755 if (statements != NULL && statements->body.size == 1) {
15756 pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
15757 }
15758
15759 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->previous));
15760 }
15761
15762 // Here the lexer has returned the beginning of an embedded variable.
15763 // In that case we'll parse the variable and create an appropriate node
15764 // for it and then return that node. These kinds of parts look like:
15765 //
15766 // "aaa #{bbb} #@ccc ddd"
15767 // ^^^^^
15768 case PM_TOKEN_EMBVAR: {
15769 // Ruby disallows seeing encoding around interpolation in strings,
15770 // even though it is known at parse time.
15771 parser->explicit_encoding = NULL;
15772
15773 lex_state_set(parser, PM_LEX_STATE_BEG);
15774 parser_lex(parser);
15775
15776 pm_token_t operator = parser->previous;
15777 pm_node_t *variable;
15778
15779 switch (parser->current.type) {
15780 // In this case a back reference is being interpolated. We'll
15781 // create a global variable read node.
15782 case PM_TOKEN_BACK_REFERENCE:
15783 parser_lex(parser);
15784 variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
15785 break;
15786 // In this case an nth reference is being interpolated. We'll
15787 // create a global variable read node.
15788 case PM_TOKEN_NUMBERED_REFERENCE:
15789 parser_lex(parser);
15790 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
15791 break;
15792 // In this case a global variable is being interpolated. We'll
15793 // create a global variable read node.
15794 case PM_TOKEN_GLOBAL_VARIABLE:
15795 parser_lex(parser);
15796 variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
15797 break;
15798 // In this case an instance variable is being interpolated.
15799 // We'll create an instance variable read node.
15800 case PM_TOKEN_INSTANCE_VARIABLE:
15801 parser_lex(parser);
15802 variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
15803 break;
15804 // In this case a class variable is being interpolated. We'll
15805 // create a class variable read node.
15806 case PM_TOKEN_CLASS_VARIABLE:
15807 parser_lex(parser);
15808 variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
15809 break;
15810 // We can hit here if we got an invalid token. In that case
15811 // we'll not attempt to lex this token and instead just return a
15812 // missing node.
15813 default:
15814 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15815 variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
15816 break;
15817 }
15818
15819 return UP(pm_embedded_variable_node_create(parser, &operator, variable));
15820 }
15821 default:
15822 parser_lex(parser);
15823 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15824 return NULL;
15825 }
15826}
15827
15833static const uint8_t *
15834parse_operator_symbol_name(const pm_token_t *name) {
15835 switch (name->type) {
15836 case PM_TOKEN_TILDE:
15837 case PM_TOKEN_BANG:
15838 if (name->end[-1] == '@') return name->end - 1;
15840 default:
15841 return name->end;
15842 }
15843}
15844
15845static pm_node_t *
15846parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
15847 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, NULL);
15848 const uint8_t *end = parse_operator_symbol_name(&parser->current);
15849
15850 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15851 parser_lex(parser);
15852
15853 pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
15854 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15855
15856 return UP(symbol);
15857}
15858
15864static pm_node_t *
15865parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
15866 const pm_token_t opening = parser->previous;
15867
15868 if (lex_mode->mode != PM_LEX_STRING) {
15869 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15870
15871 switch (parser->current.type) {
15872 case PM_CASE_OPERATOR:
15873 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15874 case PM_TOKEN_IDENTIFIER:
15875 case PM_TOKEN_CONSTANT:
15876 case PM_TOKEN_INSTANCE_VARIABLE:
15877 case PM_TOKEN_METHOD_NAME:
15878 case PM_TOKEN_CLASS_VARIABLE:
15879 case PM_TOKEN_GLOBAL_VARIABLE:
15880 case PM_TOKEN_NUMBERED_REFERENCE:
15881 case PM_TOKEN_BACK_REFERENCE:
15882 case PM_CASE_KEYWORD:
15883 parser_lex(parser);
15884 break;
15885 default:
15886 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15887 break;
15888 }
15889
15890 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, NULL);
15891 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
15892 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
15893
15894 return UP(symbol);
15895 }
15896
15897 if (lex_mode->as.string.interpolation) {
15898 // If we have the end of the symbol, then we can return an empty symbol.
15899 if (match1(parser, PM_TOKEN_STRING_END)) {
15900 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15901 parser_lex(parser);
15902 pm_token_t content = {
15903 .type = PM_TOKEN_STRING_CONTENT,
15904 .start = parser->previous.start,
15905 .end = parser->previous.start
15906 };
15907
15908 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->previous));
15909 }
15910
15911 // Now we can parse the first part of the symbol.
15912 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15913
15914 // If we got a string part, then it's possible that we could transform
15915 // what looks like an interpolated symbol into a regular symbol.
15916 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15917 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15918 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15919
15920 return UP(pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous));
15921 }
15922
15923 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15924 if (part) pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15925
15926 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15927 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15928 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15929 }
15930 }
15931
15932 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15933 if (match1(parser, PM_TOKEN_EOF)) {
15934 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15935 } else {
15936 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15937 }
15938
15939 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15940 return UP(symbol);
15941 }
15942
15943 pm_token_t content;
15944 pm_string_t unescaped;
15945
15946 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15947 content = parser->current;
15948 unescaped = parser->current_string;
15949 parser_lex(parser);
15950
15951 // If we have two string contents in a row, then the content of this
15952 // symbol is split because of heredoc contents. This looks like:
15953 //
15954 // <<A; :'a
15955 // A
15956 // b'
15957 //
15958 // In this case, the best way we have to represent this is as an
15959 // interpolated string node, so that's what we'll do here.
15960 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15961 pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
15962 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15963 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15964
15965 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->current, NULL, &parser->current_string));
15966 pm_interpolated_symbol_node_append(parser->arena, symbol, part);
15967
15968 if (next_state != PM_LEX_STATE_NONE) {
15969 lex_state_set(parser, next_state);
15970 }
15971
15972 parser_lex(parser);
15973 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15974
15975 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->previous);
15976 return UP(symbol);
15977 }
15978 } else {
15979 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
15980 pm_string_shared_init(&unescaped, content.start, content.end);
15981 }
15982
15983 if (next_state != PM_LEX_STATE_NONE) {
15984 lex_state_set(parser, next_state);
15985 }
15986
15987 if (match1(parser, PM_TOKEN_EOF)) {
15988 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15989 } else {
15990 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15991 }
15992
15993 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false)));
15994}
15995
16000static PRISM_INLINE pm_node_t *
16001parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16002 switch (parser->current.type) {
16003 case PM_CASE_OPERATOR:
16004 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
16005 case PM_CASE_KEYWORD:
16006 case PM_TOKEN_CONSTANT:
16007 case PM_TOKEN_IDENTIFIER:
16008 case PM_TOKEN_METHOD_NAME: {
16009 parser_lex(parser);
16010
16011 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
16012 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16013 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16014
16015 return UP(symbol);
16016 }
16017 case PM_TOKEN_SYMBOL_BEGIN: {
16018 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16019 parser_lex(parser);
16020
16021 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16022 }
16023 default:
16024 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16025 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16026 }
16027}
16028
16035static PRISM_INLINE pm_node_t *
16036parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16037 switch (parser->current.type) {
16038 case PM_CASE_OPERATOR:
16039 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16040 case PM_CASE_KEYWORD:
16041 case PM_TOKEN_CONSTANT:
16042 case PM_TOKEN_IDENTIFIER:
16043 case PM_TOKEN_METHOD_NAME: {
16044 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16045 parser_lex(parser);
16046
16047 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, NULL, &parser->previous, NULL);
16048 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16049 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16050
16051 return UP(symbol);
16052 }
16053 case PM_TOKEN_SYMBOL_BEGIN: {
16054 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16055 parser_lex(parser);
16056
16057 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16058 }
16059 case PM_TOKEN_BACK_REFERENCE:
16060 parser_lex(parser);
16061 return UP(pm_back_reference_read_node_create(parser, &parser->previous));
16062 case PM_TOKEN_NUMBERED_REFERENCE:
16063 parser_lex(parser);
16064 return UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
16065 case PM_TOKEN_GLOBAL_VARIABLE:
16066 parser_lex(parser);
16067 return UP(pm_global_variable_read_node_create(parser, &parser->previous));
16068 default:
16069 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16070 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
16071 }
16072}
16073
16078static pm_node_t *
16079parse_variable(pm_parser_t *parser) {
16080 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16081 int depth;
16082 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
16083
16084 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16085 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false));
16086 }
16087
16088 pm_scope_t *current_scope = parser->current_scope;
16089 if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16090 if (is_numbered_param) {
16091 // When you use a numbered parameter, it implies the existence of
16092 // all of the locals that exist before it. For example, referencing
16093 // _2 means that _1 must exist. Therefore here we loop through all
16094 // of the possibilities and add them into the constant pool.
16095 uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16096 for (uint8_t number = 1; number <= maximum; number++) {
16097 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16098 }
16099
16100 if (!match1(parser, PM_TOKEN_EQUAL)) {
16101 parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16102 }
16103
16104 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false));
16105 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
16106
16107 return node;
16108 } else if ((parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16109 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->previous));
16110 pm_node_list_append(parser->arena, &current_scope->implicit_parameters, node);
16111
16112 return node;
16113 }
16114 }
16115
16116 return NULL;
16117}
16118
16122static pm_node_t *
16123parse_variable_call(pm_parser_t *parser) {
16124 pm_node_flags_t flags = 0;
16125
16126 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16127 pm_node_t *node = parse_variable(parser);
16128 if (node != NULL) return node;
16129 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
16130 }
16131
16132 pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16133 pm_node_flag_set(UP(node), flags);
16134
16135 return UP(node);
16136}
16137
16144parse_method_definition_name(pm_parser_t *parser) {
16145 switch (parser->current.type) {
16146 case PM_CASE_KEYWORD:
16147 case PM_TOKEN_CONSTANT:
16148 case PM_TOKEN_METHOD_NAME:
16149 parser_lex(parser);
16150 return parser->previous;
16151 case PM_TOKEN_IDENTIFIER:
16152 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current));
16153 parser_lex(parser);
16154 return parser->previous;
16155 case PM_CASE_OPERATOR:
16156 lex_state_set(parser, PM_LEX_STATE_ENDFN);
16157 parser_lex(parser);
16158 return parser->previous;
16159 default:
16160 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_NAME, pm_token_str(parser->current.type));
16161 return (pm_token_t) { .type = 0, .start = parser->current.start, .end = parser->current.end };
16162 }
16163}
16164
16165static void
16166parse_heredoc_dedent_string(pm_arena_t *arena, pm_string_t *string, size_t common_whitespace) {
16167 // Make a writable copy in the arena if the string isn't already writable.
16168 // We keep a mutable pointer to the arena memory so we can memmove into it
16169 // below without casting away const from the string's source field.
16170 uint8_t *writable;
16171
16172 if (string->type != PM_STRING_OWNED) {
16173 size_t length = pm_string_length(string);
16174 writable = (uint8_t *) pm_arena_memdup(arena, pm_string_source(string), length, PRISM_ALIGNOF(uint8_t));
16175 pm_string_constant_init(string, (const char *) writable, length);
16176 } else {
16177 writable = (uint8_t *) string->source;
16178 }
16179
16180 // Now get the bounds of the existing string. We'll use this as a
16181 // destination to move bytes into. We'll also use it for bounds checking
16182 // since we don't require that these strings be null terminated.
16183 size_t dest_length = pm_string_length(string);
16184 const uint8_t *source_cursor = writable;
16185 const uint8_t *source_end = source_cursor + dest_length;
16186
16187 // We're going to move bytes backward in the string when we get leading
16188 // whitespace, so we'll maintain a pointer to the current position in the
16189 // string that we're writing to.
16190 size_t trimmed_whitespace = 0;
16191
16192 // While we haven't reached the amount of common whitespace that we need to
16193 // trim and we haven't reached the end of the string, we'll keep trimming
16194 // whitespace. Trimming in this context means skipping over these bytes such
16195 // that they aren't copied into the new string.
16196 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16197 if (*source_cursor == '\t') {
16198 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16199 if (trimmed_whitespace > common_whitespace) break;
16200 } else {
16201 trimmed_whitespace++;
16202 }
16203
16204 source_cursor++;
16205 dest_length--;
16206 }
16207
16208 memmove(writable, source_cursor, (size_t) (source_end - source_cursor));
16209 string->length = dest_length;
16210}
16211
16216static PRISM_INLINE bool
16217heredoc_dedent_discard_string_node(pm_parser_t *parser, pm_string_node_t *string_node) {
16218 if (string_node->unescaped.length == 0) {
16219 const uint8_t *cursor = parser->start + PM_LOCATION_START(&string_node->content_loc);
16220 return pm_memchr(cursor, '\\', string_node->content_loc.length, parser->encoding_changed, parser->encoding) == NULL;
16221 }
16222 return false;
16223}
16224
16228static void
16229parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16230 // The next node should be dedented if it's the first node in the list or if
16231 // it follows a string node.
16232 bool dedent_next = true;
16233
16234 // Iterate over all nodes, and trim whitespace accordingly. We're going to
16235 // keep around two indices: a read and a write.
16236 size_t write_index = 0;
16237
16238 pm_node_t *node;
16239 PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16240 // We're not manipulating child nodes that aren't strings. In this case
16241 // we'll skip past it and indicate that the subsequent node should not
16242 // be dedented.
16243 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16244 nodes->nodes[write_index++] = node;
16245 dedent_next = false;
16246 continue;
16247 }
16248
16249 pm_string_node_t *string_node = ((pm_string_node_t *) node);
16250 if (dedent_next) {
16251 parse_heredoc_dedent_string(parser->arena, &string_node->unescaped, common_whitespace);
16252 }
16253
16254 if (heredoc_dedent_discard_string_node(parser, string_node)) {
16255 } else {
16256 nodes->nodes[write_index++] = node;
16257 }
16258
16259 // We always dedent the next node if it follows a string node.
16260 dedent_next = true;
16261 }
16262
16263 nodes->size = write_index;
16264}
16265
16269static pm_token_t
16270parse_strings_empty_content(const uint8_t *location) {
16271 return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16272}
16273
16277static PRISM_INLINE pm_node_t *
16278parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16279 assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16280 bool concating = false;
16281
16282 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16283 pm_node_t *node = NULL;
16284
16285 // Here we have found a string literal. We'll parse it and add it to
16286 // the list of strings.
16287 const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16288 assert(lex_mode->mode == PM_LEX_STRING);
16289 bool lex_interpolation = lex_mode->as.string.interpolation;
16290 bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16291
16292 pm_token_t opening = parser->current;
16293 parser_lex(parser);
16294
16295 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16296 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16297 // If we get here, then we have an end immediately after a
16298 // start. In that case we'll create an empty content token and
16299 // return an uninterpolated string.
16300 pm_token_t content = parse_strings_empty_content(parser->previous.start);
16301 pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16302
16303 pm_string_shared_init(&string->unescaped, content.start, content.end);
16304 node = UP(string);
16305 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16306 // If we get here, then we have an end of a label immediately
16307 // after a start. In that case we'll create an empty symbol
16308 // node.
16309 pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, NULL, &parser->previous);
16310 pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.start);
16311 node = UP(symbol);
16312
16313 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16314 } else if (!lex_interpolation) {
16315 // If we don't accept interpolation then we expect the string to
16316 // start with a single string content node.
16317 pm_string_t unescaped;
16318 pm_token_t content;
16319
16320 if (match1(parser, PM_TOKEN_EOF)) {
16321 unescaped = PM_STRING_EMPTY;
16322 content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start, .end = parser->start };
16323 } else {
16324 unescaped = parser->current_string;
16325 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16326 content = parser->previous;
16327 }
16328
16329 // It is unfortunately possible to have multiple string content
16330 // nodes in a row in the case that there's heredoc content in
16331 // the middle of the string, like this cursed example:
16332 //
16333 // <<-END+'b
16334 // a
16335 // END
16336 // c'+'d'
16337 //
16338 // In that case we need to switch to an interpolated string to
16339 // be able to contain all of the parts.
16340 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16341 pm_node_list_t parts = { 0 };
16342 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
16343 pm_node_list_append(parser->arena, &parts, part);
16344
16345 do {
16346 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
16347 pm_node_list_append(parser->arena, &parts, part);
16348 parser_lex(parser);
16349 } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16350
16351 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16352 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16353 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16354 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16355 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16356 } else if (match1(parser, PM_TOKEN_EOF)) {
16357 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16358 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16359 } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16360 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16361 } else {
16362 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_str(parser->previous.type));
16363 parser->previous.start = parser->previous.end;
16364 parser->previous.type = 0;
16365 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped));
16366 }
16367 } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16368 // In this case we've hit string content so we know the string
16369 // at least has something in it. We'll need to check if the
16370 // following token is the end (in which case we can return a
16371 // plain string) or if it's not then it has interpolation.
16372 pm_token_t content = parser->current;
16373 pm_string_t unescaped = parser->current_string;
16374 parser_lex(parser);
16375
16376 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16377 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
16378 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16379
16380 // Kind of odd behavior, but basically if we have an
16381 // unterminated string and it ends in a newline, we back up one
16382 // character so that the error message is on the last line of
16383 // content in the string.
16384 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16385 const uint8_t *location = parser->previous.end;
16386 if (location > parser->start && location[-1] == '\n') location--;
16387 pm_parser_err(parser, U32(location - parser->start), 0, PM_ERR_STRING_LITERAL_EOF);
16388
16389 parser->previous.start = parser->previous.end;
16390 parser->previous.type = 0;
16391 }
16392 } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16393 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true)));
16394 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16395 } else {
16396 // If we get here, then we have interpolation so we'll need
16397 // to create a string or symbol node with interpolation.
16398 pm_node_list_t parts = { 0 };
16399 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
16400 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16401 pm_node_list_append(parser->arena, &parts, part);
16402
16403 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16404 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16405 pm_node_list_append(parser->arena, &parts, part);
16406 }
16407 }
16408
16409 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16410 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16411 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16412 } else if (match1(parser, PM_TOKEN_EOF)) {
16413 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16414 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16415 } else {
16416 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16417 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16418 }
16419 }
16420 } else {
16421 // If we get here, then the first part of the string is not plain
16422 // string content, in which case we need to parse the string as an
16423 // interpolated string.
16424 pm_node_list_t parts = { 0 };
16425 pm_node_t *part;
16426
16427 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16428 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16429 pm_node_list_append(parser->arena, &parts, part);
16430 }
16431 }
16432
16433 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16434 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous));
16435 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16436 } else if (match1(parser, PM_TOKEN_EOF)) {
16437 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16438 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current));
16439 } else {
16440 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16441 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous));
16442 }
16443 }
16444
16445 if (current == NULL) {
16446 // If the node we just parsed is a symbol node, then we can't
16447 // concatenate it with anything else, so we can now return that
16448 // node.
16449 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16450 return node;
16451 }
16452
16453 // If we don't already have a node, then it's fine and we can just
16454 // set the result to be the node we just parsed.
16455 current = node;
16456 } else {
16457 // Otherwise we need to check the type of the node we just parsed.
16458 // If it cannot be concatenated with the previous node, then we'll
16459 // need to add a syntax error.
16460 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16461 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16462 }
16463
16464 // If we haven't already created our container for concatenation,
16465 // we'll do that now.
16466 if (!concating) {
16467 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16468 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16469 }
16470
16471 concating = true;
16472 pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
16473 pm_interpolated_string_node_append(parser, container, current);
16474 current = UP(container);
16475 }
16476
16477 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, node);
16478 }
16479 }
16480
16481 return current;
16482}
16483
16484#define PM_PARSE_PATTERN_SINGLE 0
16485#define PM_PARSE_PATTERN_TOP 1
16486#define PM_PARSE_PATTERN_MULTI 2
16487
16488static pm_node_t *
16489parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16490
16496static void
16497parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16498 // Skip this capture if it starts with an underscore.
16499 if (peek_at(parser, parser->start + location->start) == '_') return;
16500
16501 if (pm_constant_id_list_includes(captures, capture)) {
16502 pm_parser_err(parser, location->start, location->length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16503 } else {
16504 pm_constant_id_list_append(parser->arena, captures, capture);
16505 }
16506}
16507
16511static pm_node_t *
16512parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16513 // Now, if there are any :: operators that follow, parse them as constant
16514 // path nodes.
16515 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16516 pm_token_t delimiter = parser->previous;
16517 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16518 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
16519 }
16520
16521 // If there is a [ or ( that follows, then this is part of a larger pattern
16522 // expression. We'll parse the inner pattern here, then modify the returned
16523 // inner pattern with our constant path attached.
16524 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16525 return node;
16526 }
16527
16528 pm_token_t opening;
16529 pm_token_t closing;
16530 pm_node_t *inner = NULL;
16531
16532 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16533 opening = parser->previous;
16534 accept1(parser, PM_TOKEN_NEWLINE);
16535
16536 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16537 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16538 accept1(parser, PM_TOKEN_NEWLINE);
16539 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16540 }
16541
16542 closing = parser->previous;
16543 } else {
16544 parser_lex(parser);
16545 opening = parser->previous;
16546 accept1(parser, PM_TOKEN_NEWLINE);
16547
16548 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16549 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16550 accept1(parser, PM_TOKEN_NEWLINE);
16551 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16552 }
16553
16554 closing = parser->previous;
16555 }
16556
16557 if (!inner) {
16558 // If there was no inner pattern, then we have something like Foo() or
16559 // Foo[]. In that case we'll create an array pattern with no requireds.
16560 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16561 }
16562
16563 // Now that we have the inner pattern, check to see if it's an array, find,
16564 // or hash pattern. If it is, then we'll attach our constant path to it if
16565 // it doesn't already have a constant. If it's not one of those node types
16566 // or it does have a constant, then we'll create an array pattern.
16567 switch (PM_NODE_TYPE(inner)) {
16568 case PM_ARRAY_PATTERN_NODE: {
16569 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16570
16571 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16572 PM_NODE_START_SET_NODE(pattern_node, node);
16573 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16574
16575 pattern_node->constant = node;
16576 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16577 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16578
16579 return UP(pattern_node);
16580 }
16581
16582 break;
16583 }
16584 case PM_FIND_PATTERN_NODE: {
16585 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16586
16587 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16588 PM_NODE_START_SET_NODE(pattern_node, node);
16589 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16590
16591 pattern_node->constant = node;
16592 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16593 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16594
16595 return UP(pattern_node);
16596 }
16597
16598 break;
16599 }
16600 case PM_HASH_PATTERN_NODE: {
16601 pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16602
16603 if (pattern_node->constant == NULL && pattern_node->opening_loc.length == 0) {
16604 PM_NODE_START_SET_NODE(pattern_node, node);
16605 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16606
16607 pattern_node->constant = node;
16608 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16609 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16610
16611 return UP(pattern_node);
16612 }
16613
16614 break;
16615 }
16616 default:
16617 break;
16618 }
16619
16620 // If we got here, then we didn't return one of the inner patterns by
16621 // attaching its constant. In this case we'll create an array pattern and
16622 // attach our constant to it.
16623 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16624 pm_array_pattern_node_requireds_append(parser->arena, pattern_node, inner);
16625 return UP(pattern_node);
16626}
16627
16631static pm_splat_node_t *
16632parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16633 assert(parser->previous.type == PM_TOKEN_USTAR);
16634 pm_token_t operator = parser->previous;
16635 pm_node_t *name = NULL;
16636
16637 // Rest patterns don't necessarily have a name associated with them. So we
16638 // will check for that here. If they do, then we'll add it to the local
16639 // table since this pattern will cause it to become a local variable.
16640 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16641 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16642
16643 int depth;
16644 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16645 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16646 }
16647
16648 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16649 name = UP(pm_local_variable_target_node_create(
16650 parser,
16651 &TOK2LOC(parser, &parser->previous),
16652 constant_id,
16653 (uint32_t) (depth == -1 ? 0 : depth)
16654 ));
16655 }
16656
16657 // Finally we can return the created node.
16658 return pm_splat_node_create(parser, &operator, name);
16659}
16660
16664static pm_node_t *
16665parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16666 assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16667 parser_lex(parser);
16668
16669 pm_token_t operator = parser->previous;
16670 pm_node_t *value = NULL;
16671
16672 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16673 return UP(pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous));
16674 }
16675
16676 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16677 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16678
16679 int depth;
16680 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16681 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16682 }
16683
16684 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16685 value = UP(pm_local_variable_target_node_create(
16686 parser,
16687 &TOK2LOC(parser, &parser->previous),
16688 constant_id,
16689 (uint32_t) (depth == -1 ? 0 : depth)
16690 ));
16691 }
16692
16693 return UP(pm_assoc_splat_node_create(parser, value, &operator));
16694}
16695
16700static bool
16701pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16702 ptrdiff_t length = end - start;
16703 if (length == 0) return false;
16704
16705 // First ensure that it starts with a valid identifier starting character.
16706 size_t width = char_is_identifier_start(parser, start, end - start);
16707 if (width == 0) return false;
16708
16709 // Next, ensure that it's not an uppercase character.
16710 if (parser->encoding_changed) {
16711 if (parser->encoding->isupper_char(start, length)) return false;
16712 } else {
16713 if (pm_encoding_utf_8_isupper_char(start, length)) return false;
16714 }
16715
16716 // Next, iterate through all of the bytes of the string to ensure that they
16717 // are all valid identifier characters.
16718 const uint8_t *cursor = start + width;
16719 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16720 return cursor == end;
16721}
16722
16727static pm_node_t *
16728parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
16729 const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
16730 const uint8_t *start = parser->start + PM_LOCATION_START(value_loc);
16731 const uint8_t *end = parser->start + PM_LOCATION_END(value_loc);
16732
16733 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16734 int depth = -1;
16735
16736 if (pm_slice_is_valid_local(parser, start, end)) {
16737 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16738 } else {
16739 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16740
16741 if ((end > start) && ((end[-1] == '!') || (end[-1] == '?'))) {
16742 PM_PARSER_ERR_FORMAT(parser, value_loc->start, value_loc->length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (end - start), (const char *) start);
16743 }
16744 }
16745
16746 if (depth == -1) {
16747 pm_parser_local_add(parser, constant_id, start, end, 0);
16748 }
16749
16750 parse_pattern_capture(parser, captures, constant_id, value_loc);
16751 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
16752 parser,
16753 value_loc,
16754 constant_id,
16755 (uint32_t) (depth == -1 ? 0 : depth)
16756 );
16757
16758 return UP(pm_implicit_node_create(parser, UP(target)));
16759}
16760
16765static void
16766parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
16767 if (pm_static_literals_add(&parser->line_offsets, parser->start, parser->start_line, keys, node, true) != NULL) {
16768 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16769 }
16770}
16771
16776parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
16777 pm_node_list_t assocs = { 0 };
16778 pm_static_literals_t keys = { 0 };
16779 pm_node_t *rest = NULL;
16780
16781 switch (PM_NODE_TYPE(first_node)) {
16782 case PM_ASSOC_SPLAT_NODE:
16783 case PM_NO_KEYWORDS_PARAMETER_NODE:
16784 rest = first_node;
16785 break;
16786 case PM_INTERPOLATED_SYMBOL_NODE:
16787 case PM_SYMBOL_NODE: {
16788 if (pm_symbol_node_label_p(parser, first_node)) {
16789 if (PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE)) {
16790 pm_parser_err_node(parser, first_node, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16791 } else {
16792 parse_pattern_hash_key(parser, &keys, first_node);
16793 }
16794
16795 pm_node_t *value;
16796
16797 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16798 if (PM_NODE_TYPE_P(first_node, PM_SYMBOL_NODE)) {
16799 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
16800 } else {
16801 value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(first_node), 0));
16802 }
16803 } else {
16804 // Here we have a value for the first assoc in the list, so
16805 // we will parse it now.
16806 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16807 }
16808
16809 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16810 pm_node_list_append(parser->arena, &assocs, assoc);
16811 break;
16812 }
16813 }
16815 default: {
16816 // If we get anything else, then this is an error. For this we'll
16817 // create a missing node for the value and create an assoc node for
16818 // the first node in the list.
16819 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16820 pm_parser_err_node(parser, first_node, diag_id);
16821
16822 pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16823 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16824
16825 pm_node_list_append(parser->arena, &assocs, assoc);
16826 break;
16827 }
16828 }
16829
16830 // If there are any other assocs, then we'll parse them now.
16831 while (accept1(parser, PM_TOKEN_COMMA)) {
16832 // Here we need to break to support trailing commas.
16833 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16834 // Trailing commas are not allowed to follow a rest pattern.
16835 if (rest != NULL) {
16836 pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16837 }
16838
16839 break;
16840 }
16841
16842 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16843 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16844
16845 if (rest == NULL) {
16846 rest = assoc;
16847 } else {
16848 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16849 pm_node_list_append(parser->arena, &assocs, assoc);
16850 }
16851 } else {
16852 pm_node_t *key;
16853
16854 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16855 key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
16856
16857 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16858 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16859 } else if (!pm_symbol_node_label_p(parser, key)) {
16860 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16861 }
16862 } else if (accept1(parser, PM_TOKEN_LABEL)) {
16863 key = UP(pm_symbol_node_label_create(parser, &parser->previous));
16864 } else {
16865 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16866
16867 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = parser->previous.end, .end = parser->previous.end };
16868 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16869 }
16870
16871 parse_pattern_hash_key(parser, &keys, key);
16872 pm_node_t *value = NULL;
16873
16874 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16875 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16876 value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
16877 } else {
16878 value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(key), 0));
16879 }
16880 } else {
16881 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16882 }
16883
16884 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16885
16886 if (rest != NULL) {
16887 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16888 }
16889
16890 pm_node_list_append(parser->arena, &assocs, assoc);
16891 }
16892 }
16893
16894 pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
16895 // assocs.nodes is arena-allocated; no explicit free needed.
16896
16897 pm_static_literals_free(&keys);
16898 return node;
16899}
16900
16904static pm_node_t *
16905parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
16906 switch (parser->current.type) {
16907 case PM_TOKEN_IDENTIFIER:
16908 case PM_TOKEN_METHOD_NAME: {
16909 parser_lex(parser);
16910 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16911
16912 int depth;
16913 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16914 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16915 }
16916
16917 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
16918 return UP(pm_local_variable_target_node_create(
16919 parser,
16920 &TOK2LOC(parser, &parser->previous),
16921 constant_id,
16922 (uint32_t) (depth == -1 ? 0 : depth)
16923 ));
16924 }
16925 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16926 pm_token_t opening = parser->current;
16927 parser_lex(parser);
16928
16929 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16930 // If we have an empty array pattern, then we'll just return a new
16931 // array pattern node.
16932 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->previous));
16933 }
16934
16935 // Otherwise, we'll parse the inner pattern, then deal with it depending
16936 // on the type it returns.
16937 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16938
16939 accept1(parser, PM_TOKEN_NEWLINE);
16940 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16941 pm_token_t closing = parser->previous;
16942
16943 switch (PM_NODE_TYPE(inner)) {
16944 case PM_ARRAY_PATTERN_NODE: {
16945 pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16946 if (pattern_node->opening_loc.length == 0) {
16947 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16948 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16949
16950 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16951 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16952
16953 return UP(pattern_node);
16954 }
16955
16956 break;
16957 }
16958 case PM_FIND_PATTERN_NODE: {
16959 pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16960 if (pattern_node->opening_loc.length == 0) {
16961 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16962 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16963
16964 pattern_node->opening_loc = TOK2LOC(parser, &opening);
16965 pattern_node->closing_loc = TOK2LOC(parser, &closing);
16966
16967 return UP(pattern_node);
16968 }
16969
16970 break;
16971 }
16972 default:
16973 break;
16974 }
16975
16976 pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
16977 pm_array_pattern_node_requireds_append(parser->arena, node, inner);
16978 return UP(node);
16979 }
16980 case PM_TOKEN_BRACE_LEFT: {
16981 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
16982 parser->pattern_matching_newlines = false;
16983
16985 pm_token_t opening = parser->current;
16986 parser_lex(parser);
16987
16988 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16989 // If we have an empty hash pattern, then we'll just return a new hash
16990 // pattern node.
16991 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
16992 } else {
16993 pm_node_t *first_node;
16994
16995 switch (parser->current.type) {
16996 case PM_TOKEN_LABEL:
16997 parser_lex(parser);
16998 first_node = UP(pm_symbol_node_label_create(parser, &parser->previous));
16999 break;
17000 case PM_TOKEN_USTAR_STAR:
17001 first_node = parse_pattern_keyword_rest(parser, captures);
17002 break;
17003 case PM_TOKEN_STRING_BEGIN:
17004 first_node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_LABEL, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17005 break;
17006 default: {
17007 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_str(parser->current.type));
17008 parser_lex(parser);
17009
17010 first_node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
17011 break;
17012 }
17013 }
17014
17015 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17016
17017 accept1(parser, PM_TOKEN_NEWLINE);
17018 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
17019 pm_token_t closing = parser->previous;
17020
17021 PM_NODE_START_SET_TOKEN(parser, node, &opening);
17022 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
17023
17024 node->opening_loc = TOK2LOC(parser, &opening);
17025 node->closing_loc = TOK2LOC(parser, &closing);
17026 }
17027
17028 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17029 return UP(node);
17030 }
17031 case PM_TOKEN_UDOT_DOT:
17032 case PM_TOKEN_UDOT_DOT_DOT: {
17033 pm_token_t operator = parser->current;
17034 parser_lex(parser);
17035
17036 // Since we have a unary range operator, we need to parse the subsequent
17037 // expression as the right side of the range.
17038 switch (parser->current.type) {
17039 case PM_CASE_PRIMITIVE: {
17040 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17041 return UP(pm_range_node_create(parser, NULL, &operator, right));
17042 }
17043 default: {
17044 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17045 pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
17046 return UP(pm_range_node_create(parser, NULL, &operator, right));
17047 }
17048 }
17049 }
17050 case PM_CASE_PRIMITIVE: {
17051 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_LABEL | PM_PARSE_ACCEPTS_DO_BLOCK, diag_id, (uint16_t) (depth + 1));
17052
17053 // If we found a label, we need to immediately return to the caller.
17054 if (pm_symbol_node_label_p(parser, node)) return node;
17055
17056 // Call nodes (arithmetic operations) are not allowed in patterns
17057 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
17058 pm_parser_err_node(parser, node, diag_id);
17059 return UP(pm_error_recovery_node_create_unexpected(parser, node));
17060 }
17061
17062 // Now that we have a primitive, we need to check if it's part of a range.
17063 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17064 pm_token_t operator = parser->previous;
17065
17066 // Now that we have the operator, we need to check if this is followed
17067 // by another expression. If it is, then we will create a full range
17068 // node. Otherwise, we'll create an endless range.
17069 switch (parser->current.type) {
17070 case PM_CASE_PRIMITIVE: {
17071 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17072 return UP(pm_range_node_create(parser, node, &operator, right));
17073 }
17074 default:
17075 return UP(pm_range_node_create(parser, node, &operator, NULL));
17076 }
17077 }
17078
17079 return node;
17080 }
17081 case PM_TOKEN_CARET: {
17082 parser_lex(parser);
17083 pm_token_t operator = parser->previous;
17084
17085 // At this point we have a pin operator. We need to check the subsequent
17086 // expression to determine if it's a variable or an expression.
17087 switch (parser->current.type) {
17088 case PM_TOKEN_IDENTIFIER: {
17089 parser_lex(parser);
17090 pm_node_t *variable = UP(parse_variable(parser));
17091
17092 if (variable == NULL) {
17093 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17094 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->previous, 0));
17095 }
17096
17097 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17098 }
17099 case PM_TOKEN_INSTANCE_VARIABLE: {
17100 parser_lex(parser);
17101 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
17102
17103 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17104 }
17105 case PM_TOKEN_CLASS_VARIABLE: {
17106 parser_lex(parser);
17107 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->previous));
17108
17109 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17110 }
17111 case PM_TOKEN_GLOBAL_VARIABLE: {
17112 parser_lex(parser);
17113 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->previous));
17114
17115 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17116 }
17117 case PM_TOKEN_NUMBERED_REFERENCE: {
17118 parser_lex(parser);
17119 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
17120
17121 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17122 }
17123 case PM_TOKEN_BACK_REFERENCE: {
17124 parser_lex(parser);
17125 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->previous));
17126
17127 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17128 }
17129 case PM_TOKEN_PARENTHESIS_LEFT: {
17130 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17131 parser->pattern_matching_newlines = false;
17132
17133 pm_token_t lparen = parser->current;
17134 parser_lex(parser);
17135
17136 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_DO_BLOCK | PM_PARSE_ACCEPTS_COMMAND_CALL, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17137 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17138
17139 accept1(parser, PM_TOKEN_NEWLINE);
17140 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
17141 return UP(pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous));
17142 }
17143 default: {
17144 // If we get here, then we have a pin operator followed by something
17145 // not understood. We'll create a missing node and return that.
17146 pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17147 pm_node_t *variable = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &operator), PM_TOKEN_LENGTH(&operator)));
17148 return UP(pm_pinned_variable_node_create(parser, &operator, variable));
17149 }
17150 }
17151 }
17152 case PM_TOKEN_UCOLON_COLON: {
17153 pm_token_t delimiter = parser->current;
17154 parser_lex(parser);
17155
17156 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17157 pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17158
17159 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
17160 }
17161 case PM_TOKEN_CONSTANT: {
17162 pm_token_t constant = parser->current;
17163 parser_lex(parser);
17164
17165 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
17166 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17167 }
17168 default:
17169 pm_parser_err_current(parser, diag_id);
17170 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17171 }
17172}
17173
17174static bool
17175parse_pattern_alternation_error_each(const pm_node_t *node, void *data) {
17176 switch (PM_NODE_TYPE(node)) {
17177 case PM_LOCAL_VARIABLE_TARGET_NODE: {
17178 pm_parser_t *parser = (pm_parser_t *) data;
17179 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
17180 return false;
17181 }
17182 default:
17183 return true;
17184 }
17185}
17186
17191static void
17192parse_pattern_alternation_error(pm_parser_t *parser, const pm_node_t *node) {
17193 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
17194}
17195
17200static pm_node_t *
17201parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17202 pm_node_t *node = first_node;
17203 bool alternation = false;
17204
17205 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
17206 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->size) {
17207 parse_pattern_alternation_error(parser, node);
17208 }
17209
17210 switch (parser->current.type) {
17211 case PM_TOKEN_IDENTIFIER:
17212 case PM_TOKEN_BRACKET_LEFT_ARRAY:
17213 case PM_TOKEN_BRACE_LEFT:
17214 case PM_TOKEN_CARET:
17215 case PM_TOKEN_CONSTANT:
17216 case PM_TOKEN_UCOLON_COLON:
17217 case PM_TOKEN_UDOT_DOT:
17218 case PM_TOKEN_UDOT_DOT_DOT:
17219 case PM_CASE_PRIMITIVE: {
17220 if (!alternation) {
17221 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17222 } else {
17223 pm_token_t operator = parser->previous;
17224 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17225
17226 if (captures->size) parse_pattern_alternation_error(parser, right);
17227 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17228 }
17229
17230 break;
17231 }
17232 case PM_TOKEN_PARENTHESIS_LEFT:
17233 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17234 pm_token_t operator = parser->previous;
17235 pm_token_t opening = parser->current;
17236 parser_lex(parser);
17237
17238 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17239 accept1(parser, PM_TOKEN_NEWLINE);
17240 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
17241 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->previous, 0));
17242
17243 if (!alternation) {
17244 node = right;
17245 } else {
17246 if (captures->size) parse_pattern_alternation_error(parser, right);
17247 node = UP(pm_alternation_pattern_node_create(parser, node, right, &operator));
17248 }
17249
17250 break;
17251 }
17252 default: {
17253 pm_parser_err_current(parser, diag_id);
17254 pm_node_t *right = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
17255
17256 if (!alternation) {
17257 node = right;
17258 } else {
17259 if (captures->size) parse_pattern_alternation_error(parser, right);
17260 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->previous));
17261 }
17262
17263 break;
17264 }
17265 }
17266 }
17267
17268 // If we have an =>, then we are assigning this pattern to a variable.
17269 // In this case we should create an assignment node.
17270 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17271 pm_token_t operator = parser->previous;
17272 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17273
17274 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17275 int depth;
17276
17277 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17278 pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17279 }
17280
17281 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->previous));
17282 pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17283 parser,
17284 &TOK2LOC(parser, &parser->previous),
17285 constant_id,
17286 (uint32_t) (depth == -1 ? 0 : depth)
17287 );
17288
17289 node = UP(pm_capture_pattern_node_create(parser, node, target, &operator));
17290 }
17291
17292 return node;
17293}
17294
17298static pm_node_t *
17299parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17300 pm_node_t *node = NULL;
17301
17302 bool leading_rest = false;
17303 bool trailing_rest = false;
17304
17305 switch (parser->current.type) {
17306 case PM_TOKEN_LABEL: {
17307 parser_lex(parser);
17308 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &parser->previous));
17309 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
17310
17311 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17312 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17313 }
17314
17315 return node;
17316 }
17317 case PM_TOKEN_USTAR_STAR: {
17318 node = parse_pattern_keyword_rest(parser, captures);
17319 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17320
17321 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17322 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17323 }
17324
17325 return node;
17326 }
17327 case PM_TOKEN_STRING_BEGIN: {
17328 // We need special handling for string beginnings because they could
17329 // be dynamic symbols leading to hash patterns.
17330 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17331
17332 if (pm_symbol_node_label_p(parser, node)) {
17333 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17334
17335 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17336 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17337 }
17338
17339 return node;
17340 }
17341
17342 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17343 break;
17344 }
17345 case PM_TOKEN_USTAR: {
17346 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17347 parser_lex(parser);
17348 node = UP(parse_pattern_rest(parser, captures));
17349 leading_rest = true;
17350 break;
17351 }
17352 }
17354 default:
17355 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17356 break;
17357 }
17358
17359 // If we got a dynamic label symbol, then we need to treat it like the
17360 // beginning of a hash pattern.
17361 if (pm_symbol_node_label_p(parser, node)) {
17362 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17363 }
17364
17365 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17366 // If we have a comma, then we are now parsing either an array pattern
17367 // or a find pattern. We need to parse all of the patterns, put them
17368 // into a big list, and then determine which type of node we have.
17369 pm_node_list_t nodes = { 0 };
17370 pm_node_list_append(parser->arena, &nodes, node);
17371
17372 // Gather up all of the patterns into the list.
17373 while (accept1(parser, PM_TOKEN_COMMA)) {
17374 // Break early here in case we have a trailing comma.
17375 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17376 node = UP(pm_implicit_rest_node_create(parser, &parser->previous));
17377 pm_node_list_append(parser->arena, &nodes, node);
17378 trailing_rest = true;
17379 break;
17380 }
17381
17382 if (accept1(parser, PM_TOKEN_USTAR)) {
17383 node = UP(parse_pattern_rest(parser, captures));
17384
17385 // If we have already parsed a splat pattern, then this is an
17386 // error. We will continue to parse the rest of the patterns,
17387 // but we will indicate it as an error.
17388 if (trailing_rest) {
17389 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17390 }
17391
17392 trailing_rest = true;
17393 } else {
17394 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17395 }
17396
17397 pm_node_list_append(parser->arena, &nodes, node);
17398 }
17399
17400 // If the first pattern and the last pattern are rest patterns, then we
17401 // will call this a find pattern, regardless of how many rest patterns
17402 // are in between because we know we already added the appropriate
17403 // errors. Otherwise we will create an array pattern.
17404 if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17405 node = UP(pm_find_pattern_node_create(parser, &nodes));
17406
17407 if (nodes.size == 2) {
17408 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17409 }
17410 } else {
17411 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17412
17413 if (leading_rest && trailing_rest) {
17414 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17415 }
17416 }
17417
17418 // nodes.nodes is arena-allocated; no explicit free needed.
17419 } else if (leading_rest) {
17420 // Otherwise, if we parsed a single splat pattern, then we know we have
17421 // an array pattern, so we can go ahead and create that node.
17422 node = UP(pm_array_pattern_node_rest_create(parser, node));
17423 }
17424
17425 return node;
17426}
17427
17433static PRISM_INLINE void
17434parse_negative_numeric(pm_node_t *node) {
17435 switch (PM_NODE_TYPE(node)) {
17436 case PM_INTEGER_NODE: {
17437 pm_integer_node_t *cast = (pm_integer_node_t *) node;
17438 cast->base.location.start--;
17439 cast->base.location.length++;
17440 cast->value.negative = true;
17441 break;
17442 }
17443 case PM_FLOAT_NODE: {
17444 pm_float_node_t *cast = (pm_float_node_t *) node;
17445 cast->base.location.start--;
17446 cast->base.location.length++;
17447 cast->value = -cast->value;
17448 break;
17449 }
17450 case PM_RATIONAL_NODE: {
17451 pm_rational_node_t *cast = (pm_rational_node_t *) node;
17452 cast->base.location.start--;
17453 cast->base.location.length++;
17454 cast->numerator.negative = true;
17455 break;
17456 }
17457 case PM_IMAGINARY_NODE:
17458 node->location.start--;
17459 node->location.length++;
17460 parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17461 break;
17462 default:
17463 assert(false && "unreachable");
17464 break;
17465 }
17466}
17467
17473static void
17474pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17475 switch (diag_id) {
17476 case PM_ERR_HASH_KEY: {
17477 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, pm_token_str(parser->previous.type));
17478 break;
17479 }
17480 case PM_ERR_HASH_VALUE:
17481 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17482 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
17483 break;
17484 }
17485 case PM_ERR_UNARY_RECEIVER: {
17486 const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_str(parser->current.type));
17487 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, diag_id, human, parser->previous.start[0]);
17488 break;
17489 }
17490 case PM_ERR_UNARY_DISALLOWED:
17491 case PM_ERR_EXPECT_ARGUMENT: {
17492 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, diag_id, pm_token_str(parser->current.type));
17493 break;
17494 }
17495 default:
17496 pm_parser_err_previous(parser, diag_id);
17497 break;
17498 }
17499}
17500
17504static void
17505parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17506#define CONTEXT_NONE 0
17507#define CONTEXT_THROUGH_ENSURE 1
17508#define CONTEXT_THROUGH_ELSE 2
17509
17510 pm_context_node_t *context_node = parser->current_context;
17511 int context = CONTEXT_NONE;
17512
17513 while (context_node != NULL) {
17514 switch (context_node->context) {
17515 case PM_CONTEXT_BEGIN_RESCUE:
17516 case PM_CONTEXT_BLOCK_RESCUE:
17517 case PM_CONTEXT_CLASS_RESCUE:
17518 case PM_CONTEXT_DEF_RESCUE:
17519 case PM_CONTEXT_LAMBDA_RESCUE:
17520 case PM_CONTEXT_MODULE_RESCUE:
17521 case PM_CONTEXT_SCLASS_RESCUE:
17522 case PM_CONTEXT_DEFINED:
17523 case PM_CONTEXT_RESCUE_MODIFIER:
17524 // These are the good cases. We're allowed to have a retry here.
17525 return;
17526 case PM_CONTEXT_CLASS:
17527 case PM_CONTEXT_DEF:
17528 case PM_CONTEXT_DEF_PARAMS:
17529 case PM_CONTEXT_MAIN:
17530 case PM_CONTEXT_MODULE:
17531 case PM_CONTEXT_PREEXE:
17532 case PM_CONTEXT_SCLASS:
17533 // These are the bad cases. We're not allowed to have a retry in
17534 // these contexts.
17535 if (context == CONTEXT_NONE) {
17536 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17537 } else if (context == CONTEXT_THROUGH_ENSURE) {
17538 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17539 } else if (context == CONTEXT_THROUGH_ELSE) {
17540 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17541 }
17542 return;
17543 case PM_CONTEXT_BEGIN_ELSE:
17544 case PM_CONTEXT_BLOCK_ELSE:
17545 case PM_CONTEXT_CLASS_ELSE:
17546 case PM_CONTEXT_DEF_ELSE:
17547 case PM_CONTEXT_LAMBDA_ELSE:
17548 case PM_CONTEXT_MODULE_ELSE:
17549 case PM_CONTEXT_SCLASS_ELSE:
17550 // These are also bad cases, but with a more specific error
17551 // message indicating the else.
17552 context = CONTEXT_THROUGH_ELSE;
17553 break;
17554 case PM_CONTEXT_BEGIN_ENSURE:
17555 case PM_CONTEXT_BLOCK_ENSURE:
17556 case PM_CONTEXT_CLASS_ENSURE:
17557 case PM_CONTEXT_DEF_ENSURE:
17558 case PM_CONTEXT_LAMBDA_ENSURE:
17559 case PM_CONTEXT_MODULE_ENSURE:
17560 case PM_CONTEXT_SCLASS_ENSURE:
17561 // These are also bad cases, but with a more specific error
17562 // message indicating the ensure.
17563 context = CONTEXT_THROUGH_ENSURE;
17564 break;
17565 case PM_CONTEXT_NONE:
17566 // This case should never happen.
17567 assert(false && "unreachable");
17568 break;
17569 case PM_CONTEXT_BEGIN:
17570 case PM_CONTEXT_BLOCK_BRACES:
17571 case PM_CONTEXT_BLOCK_KEYWORDS:
17572 case PM_CONTEXT_BLOCK_PARAMETERS:
17573 case PM_CONTEXT_CASE_IN:
17574 case PM_CONTEXT_CASE_WHEN:
17575 case PM_CONTEXT_DEFAULT_PARAMS:
17576 case PM_CONTEXT_ELSE:
17577 case PM_CONTEXT_ELSIF:
17578 case PM_CONTEXT_EMBEXPR:
17579 case PM_CONTEXT_FOR_INDEX:
17580 case PM_CONTEXT_FOR:
17581 case PM_CONTEXT_IF:
17582 case PM_CONTEXT_LAMBDA_BRACES:
17583 case PM_CONTEXT_LAMBDA_DO_END:
17584 case PM_CONTEXT_LOOP_PREDICATE:
17585 case PM_CONTEXT_MULTI_TARGET:
17586 case PM_CONTEXT_PARENS:
17587 case PM_CONTEXT_POSTEXE:
17588 case PM_CONTEXT_PREDICATE:
17589 case PM_CONTEXT_TERNARY:
17590 case PM_CONTEXT_UNLESS:
17591 case PM_CONTEXT_UNTIL:
17592 case PM_CONTEXT_WHILE:
17593 // In these contexts we should continue walking up the list of
17594 // contexts.
17595 break;
17596 }
17597
17598 context_node = context_node->prev;
17599 }
17600
17601#undef CONTEXT_NONE
17602#undef CONTEXT_ENSURE
17603#undef CONTEXT_ELSE
17604}
17605
17609static void
17610parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17611 pm_context_node_t *context_node = parser->current_context;
17612
17613 while (context_node != NULL) {
17614 switch (context_node->context) {
17615 case PM_CONTEXT_DEF:
17616 case PM_CONTEXT_DEF_PARAMS:
17617 case PM_CONTEXT_DEFINED:
17618 case PM_CONTEXT_DEF_ENSURE:
17619 case PM_CONTEXT_DEF_RESCUE:
17620 case PM_CONTEXT_DEF_ELSE:
17621 // These are the good cases. We're allowed to have a block exit
17622 // in these contexts.
17623 return;
17624 case PM_CONTEXT_CLASS:
17625 case PM_CONTEXT_CLASS_ENSURE:
17626 case PM_CONTEXT_CLASS_RESCUE:
17627 case PM_CONTEXT_CLASS_ELSE:
17628 case PM_CONTEXT_MAIN:
17629 case PM_CONTEXT_MODULE:
17630 case PM_CONTEXT_MODULE_ENSURE:
17631 case PM_CONTEXT_MODULE_RESCUE:
17632 case PM_CONTEXT_MODULE_ELSE:
17633 case PM_CONTEXT_SCLASS:
17634 case PM_CONTEXT_SCLASS_RESCUE:
17635 case PM_CONTEXT_SCLASS_ENSURE:
17636 case PM_CONTEXT_SCLASS_ELSE:
17637 // These are the bad cases. We're not allowed to have a retry in
17638 // these contexts.
17639 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17640 return;
17641 case PM_CONTEXT_NONE:
17642 // This case should never happen.
17643 assert(false && "unreachable");
17644 break;
17645 case PM_CONTEXT_BEGIN:
17646 case PM_CONTEXT_BEGIN_ELSE:
17647 case PM_CONTEXT_BEGIN_ENSURE:
17648 case PM_CONTEXT_BEGIN_RESCUE:
17649 case PM_CONTEXT_BLOCK_BRACES:
17650 case PM_CONTEXT_BLOCK_KEYWORDS:
17651 case PM_CONTEXT_BLOCK_ELSE:
17652 case PM_CONTEXT_BLOCK_ENSURE:
17653 case PM_CONTEXT_BLOCK_PARAMETERS:
17654 case PM_CONTEXT_BLOCK_RESCUE:
17655 case PM_CONTEXT_CASE_IN:
17656 case PM_CONTEXT_CASE_WHEN:
17657 case PM_CONTEXT_DEFAULT_PARAMS:
17658 case PM_CONTEXT_ELSE:
17659 case PM_CONTEXT_ELSIF:
17660 case PM_CONTEXT_EMBEXPR:
17661 case PM_CONTEXT_FOR_INDEX:
17662 case PM_CONTEXT_FOR:
17663 case PM_CONTEXT_IF:
17664 case PM_CONTEXT_LAMBDA_BRACES:
17665 case PM_CONTEXT_LAMBDA_DO_END:
17666 case PM_CONTEXT_LAMBDA_ELSE:
17667 case PM_CONTEXT_LAMBDA_ENSURE:
17668 case PM_CONTEXT_LAMBDA_RESCUE:
17669 case PM_CONTEXT_LOOP_PREDICATE:
17670 case PM_CONTEXT_MULTI_TARGET:
17671 case PM_CONTEXT_PARENS:
17672 case PM_CONTEXT_POSTEXE:
17673 case PM_CONTEXT_PREDICATE:
17674 case PM_CONTEXT_PREEXE:
17675 case PM_CONTEXT_RESCUE_MODIFIER:
17676 case PM_CONTEXT_TERNARY:
17677 case PM_CONTEXT_UNLESS:
17678 case PM_CONTEXT_UNTIL:
17679 case PM_CONTEXT_WHILE:
17680 // In these contexts we should continue walking up the list of
17681 // contexts.
17682 break;
17683 }
17684
17685 context_node = context_node->prev;
17686 }
17687}
17688
17693static PRISM_INLINE bool
17694pm_call_node_command_p(const pm_call_node_t *node) {
17695 return (
17696 (node->opening_loc.length == 0) &&
17697 (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
17698 (node->arguments != NULL || node->block != NULL)
17699 );
17700}
17701
17707static bool
17708pm_command_call_value_p(const pm_node_t *node) {
17709 switch (PM_NODE_TYPE(node)) {
17710 case PM_CALL_NODE: {
17711 const pm_call_node_t *call = (const pm_call_node_t *) node;
17712
17713 // Command-style calls (e.g., foo bar, obj.foo bar).
17714 // Attribute writes (e.g., a.b = 1) are not commands.
17715 if (pm_call_node_command_p(call) && !PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE) && (call->receiver == NULL || call->call_operator_loc.length > 0)) {
17716 return true;
17717 }
17718
17719 // A `!` or `not` prefix wrapping a command call (e.g.,
17720 // `!foo bar`, `not foo bar`) is also a command-call value.
17721 if (call->receiver != NULL && call->arguments == NULL && call->opening_loc.length == 0 && call->call_operator_loc.length == 0) {
17722 return pm_command_call_value_p(call->receiver);
17723 }
17724
17725 return false;
17726 }
17727 case PM_SUPER_NODE: {
17728 const pm_super_node_t *cast = (const pm_super_node_t *) node;
17729 return cast->lparen_loc.length == 0 && (cast->arguments != NULL || cast->block != NULL);
17730 }
17731 case PM_YIELD_NODE: {
17732 const pm_yield_node_t *cast = (const pm_yield_node_t *) node;
17733 return cast->lparen_loc.length == 0 && cast->arguments != NULL;
17734 }
17735 case PM_RESCUE_MODIFIER_NODE:
17736 return pm_command_call_value_p(((const pm_rescue_modifier_node_t *) node)->expression);
17737 case PM_DEF_NODE: {
17738 const pm_def_node_t *cast = (const pm_def_node_t *) node;
17739 if (cast->equal_loc.length > 0 && cast->body != NULL) {
17740 const pm_node_t *body = cast->body;
17741 if (PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE)) {
17742 body = ((const pm_statements_node_t *) body)->body.nodes[((const pm_statements_node_t *) body)->body.size - 1];
17743 }
17744 return pm_command_call_value_p(body);
17745 }
17746 return false;
17747 }
17748 default:
17749 return false;
17750 }
17751}
17752
17759static bool
17760pm_block_call_p(const pm_node_t *node) {
17761 while (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17762 const pm_call_node_t *call = (const pm_call_node_t *) node;
17763 if (call->opening_loc.length > 0) return false;
17764
17765 // Root: command with do-block (e.g., `foo bar do end`).
17766 if (call->arguments != NULL && call->block != NULL && PM_NODE_TYPE_P(call->block, PM_BLOCK_NODE)) {
17767 return true;
17768 }
17769
17770 // Walk up the receiver chain (e.g., `foo bar do end.baz`).
17771 if (call->call_operator_loc.length > 0 && call->receiver != NULL) {
17772 node = call->receiver;
17773 continue;
17774 }
17775
17776 return false;
17777 }
17778
17779 return false;
17780}
17781
17786static pm_node_t *
17787parse_case(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
17788 size_t opening_newline_index = token_newline_index(parser);
17789 parser_lex(parser);
17790
17791 pm_token_t case_keyword = parser->previous;
17792 pm_node_t *predicate = NULL;
17793
17794 pm_node_list_t current_block_exits = { 0 };
17795 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
17796
17797 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17798 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17799 predicate = NULL;
17800 } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
17801 predicate = NULL;
17802 } else if (!token_begins_expression_p(parser->current.type)) {
17803 predicate = NULL;
17804 } else {
17805 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
17806 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17807 }
17808
17809 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
17810 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
17811 parser_lex(parser);
17812 pop_block_exits(parser, previous_block_exits);
17813 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17814 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->previous));
17815 }
17816
17817 /* At this point we can create a case node, though we don't yet know if it
17818 * is a case-in or case-when node. */
17819 pm_node_t *node;
17820
17821 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
17822 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
17823 pm_static_literals_t literals = { 0 };
17824
17825 /* At this point we've seen a when keyword, so we know this is a
17826 * case-when node. We will continue to parse the when nodes until we hit
17827 * the end of the list. */
17828 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
17829 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
17830 parser_lex(parser);
17831
17832 pm_token_t when_keyword = parser->previous;
17833 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
17834
17835 do {
17836 if (accept1(parser, PM_TOKEN_USTAR)) {
17837 pm_token_t operator = parser->previous;
17838 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17839
17840 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
17841 pm_when_node_conditions_append(parser->arena, when_node, UP(splat_node));
17842
17843 if (PM_NODE_TYPE_P(expression, PM_ERROR_RECOVERY_NODE)) break;
17844 } else {
17845 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
17846 pm_when_node_conditions_append(parser->arena, when_node, condition);
17847
17848 /* If we found a missing node, then this is a syntax error
17849 * and we should stop looping. */
17850 if (PM_NODE_TYPE_P(condition, PM_ERROR_RECOVERY_NODE)) break;
17851
17852 /* If this is a string node, then we need to mark it as
17853 * frozen because when clause strings are frozen. */
17854 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
17855 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
17856 } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
17857 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
17858 }
17859
17860 pm_when_clause_static_literals_add(parser, &literals, condition);
17861 }
17862 } while (accept1(parser, PM_TOKEN_COMMA));
17863
17864 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17865 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
17866 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
17867 }
17868 } else {
17869 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
17870 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->previous);
17871 }
17872
17873 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
17874 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
17875 if (statements != NULL) {
17876 pm_when_node_statements_set(when_node, statements);
17877 }
17878 }
17879
17880 pm_case_node_condition_append(parser->arena, case_node, UP(when_node));
17881 }
17882
17883 /* If we didn't parse any conditions (in or when) then we need to
17884 * indicate that we have an error. */
17885 if (case_node->conditions.size == 0) {
17886 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17887 }
17888
17889 pm_static_literals_free(&literals);
17890 node = UP(case_node);
17891 } else {
17892 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
17893
17894 /* If this is a case-match node (i.e., it is a pattern matching case
17895 * statement) then we must have a predicate. */
17896 if (predicate == NULL) {
17897 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
17898 }
17899
17900 /* At this point we expect that we're parsing a case-in node. We will
17901 * continue to parse the in nodes until we hit the end of the list. */
17902 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
17903 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
17904
17905 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17906 parser->pattern_matching_newlines = true;
17907
17908 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
17909 parser->command_start = false;
17910 parser_lex(parser);
17911
17912 pm_token_t in_keyword = parser->previous;
17913
17914 pm_constant_id_list_t captures = { 0 };
17915 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
17916
17917 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17918
17919 /* Since we're in the top-level of the case-in node we need to
17920 * check for guard clauses in the form of `if` or `unless`
17921 * statements. */
17922 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
17923 pm_token_t keyword = parser->previous;
17924 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
17925 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
17926 } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
17927 pm_token_t keyword = parser->previous;
17928 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
17929 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
17930 }
17931
17932 /* Now we need to check for the terminator of the in node's pattern.
17933 * It can be a newline or semicolon optionally followed by a `then`
17934 * keyword. */
17935 pm_token_t then_keyword = { 0 };
17936 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17937 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
17938 then_keyword = parser->previous;
17939 }
17940 } else {
17941 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
17942 then_keyword = parser->previous;
17943 }
17944
17945 /* Now we can actually parse the statements associated with the in
17946 * node. */
17947 pm_statements_node_t *statements;
17948 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
17949 statements = NULL;
17950 } else {
17951 statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
17952 }
17953
17954 /* Now that we have the full pattern and statements, we can create
17955 * the node and attach it to the case node. */
17956 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
17957 pm_case_match_node_condition_append(parser->arena, case_node, condition);
17958 }
17959
17960 /* If we didn't parse any conditions (in or when) then we need to
17961 * indicate that we have an error. */
17962 if (case_node->conditions.size == 0) {
17963 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
17964 }
17965
17966 node = UP(case_node);
17967 }
17968
17969 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
17970 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
17971 pm_token_t else_keyword = parser->previous;
17972 pm_else_node_t *else_node;
17973
17974 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
17975 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
17976 } else {
17977 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
17978 }
17979
17980 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
17981 pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
17982 } else {
17983 pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
17984 }
17985 }
17986
17987 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
17988 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
17989
17990 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
17991 pm_case_node_end_keyword_loc_set(parser, (pm_case_node_t *) node, &parser->previous);
17992 } else {
17993 pm_case_match_node_end_keyword_loc_set(parser, (pm_case_match_node_t *) node, &parser->previous);
17994 }
17995
17996 pop_block_exits(parser, previous_block_exits);
17997 return node;
17998}
17999
18004static pm_node_t *
18005parse_class(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
18006 size_t opening_newline_index = token_newline_index(parser);
18007 parser_lex(parser);
18008
18009 pm_token_t class_keyword = parser->previous;
18010 pm_do_loop_stack_push(parser, false);
18011
18012 pm_node_list_t current_block_exits = { 0 };
18013 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18014
18015 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18016 pm_token_t operator = parser->previous;
18017 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18018
18019 pm_parser_scope_push(parser, true);
18020 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18021 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_str(parser->current.type));
18022 }
18023
18024 pm_node_t *statements = NULL;
18025 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18026 pm_accepts_block_stack_push(parser, true);
18027 statements = UP(parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18028 pm_accepts_block_stack_pop(parser);
18029 }
18030
18031 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18032 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18033 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18034 } else {
18035 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18036 }
18037
18038 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18039
18040 pm_constant_id_list_t locals;
18041 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18042
18043 pm_parser_scope_pop(parser);
18044 pm_do_loop_stack_pop(parser);
18045
18046 flush_block_exits(parser, previous_block_exits);
18047 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous));
18048 }
18049
18050 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18051 pm_token_t name = parser->previous;
18052 if (name.type != PM_TOKEN_CONSTANT) {
18053 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18054 }
18055
18056 pm_token_t inheritance_operator = { 0 };
18057 pm_node_t *superclass;
18058
18059 if (match1(parser, PM_TOKEN_LESS)) {
18060 inheritance_operator = parser->current;
18061 lex_state_set(parser, PM_LEX_STATE_BEG);
18062
18063 parser->command_start = true;
18064 parser_lex(parser);
18065
18066 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18067 } else {
18068 superclass = NULL;
18069 }
18070
18071 pm_parser_scope_push(parser, true);
18072
18073 if (inheritance_operator.start != NULL) {
18074 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18075 } else {
18076 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18077 }
18078 pm_node_t *statements = NULL;
18079
18080 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18081 pm_accepts_block_stack_push(parser, true);
18082 statements = UP(parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18083 pm_accepts_block_stack_pop(parser);
18084 }
18085
18086 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18087 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18088 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18089 } else {
18090 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
18091 }
18092
18093 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18094
18095 if (context_def_p(parser)) {
18096 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18097 }
18098
18099 pm_constant_id_list_t locals;
18100 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18101
18102 pm_parser_scope_pop(parser);
18103 pm_do_loop_stack_pop(parser);
18104
18105 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18106 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18107 if (!PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18108 constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
18109 }
18110 }
18111
18112 pop_block_exits(parser, previous_block_exits);
18113 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->previous));
18114}
18115
18119static pm_node_t *
18120parse_def(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
18121 pm_node_list_t current_block_exits = { 0 };
18122 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18123
18124 pm_token_t def_keyword = parser->current;
18125 size_t opening_newline_index = token_newline_index(parser);
18126
18127 pm_node_t *receiver = NULL;
18128 pm_token_t operator = { 0 };
18129 pm_token_t name;
18130
18131 /* This context is necessary for lexing `...` in a bare params correctly. It
18132 * must be pushed before lexing the first param, so it is here. */
18133 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18134 parser_lex(parser);
18135
18136 /* This will be false if the method name is not a valid identifier but could
18137 * be followed by an operator. */
18138 bool valid_name = true;
18139
18140 switch (parser->current.type) {
18141 case PM_CASE_OPERATOR:
18142 pm_parser_scope_push(parser, true);
18143 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18144 parser_lex(parser);
18145
18146 name = parser->previous;
18147 break;
18148 case PM_TOKEN_IDENTIFIER: {
18149 parser_lex(parser);
18150
18151 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18152 receiver = parse_variable_call(parser);
18153
18154 pm_parser_scope_push(parser, true);
18155 lex_state_set(parser, PM_LEX_STATE_FNAME);
18156 parser_lex(parser);
18157
18158 operator = parser->previous;
18159 name = parse_method_definition_name(parser);
18160 } else {
18161 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous));
18162 pm_parser_scope_push(parser, true);
18163
18164 name = parser->previous;
18165 }
18166
18167 break;
18168 }
18169 case PM_TOKEN_INSTANCE_VARIABLE:
18170 case PM_TOKEN_CLASS_VARIABLE:
18171 case PM_TOKEN_GLOBAL_VARIABLE:
18172 valid_name = false;
18174 case PM_TOKEN_CONSTANT:
18175 case PM_TOKEN_KEYWORD_NIL:
18176 case PM_TOKEN_KEYWORD_SELF:
18177 case PM_TOKEN_KEYWORD_TRUE:
18178 case PM_TOKEN_KEYWORD_FALSE:
18179 case PM_TOKEN_KEYWORD___FILE__:
18180 case PM_TOKEN_KEYWORD___LINE__:
18181 case PM_TOKEN_KEYWORD___ENCODING__: {
18182 pm_parser_scope_push(parser, true);
18183 parser_lex(parser);
18184
18185 pm_token_t identifier = parser->previous;
18186
18187 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18188 lex_state_set(parser, PM_LEX_STATE_FNAME);
18189 parser_lex(parser);
18190 operator = parser->previous;
18191
18192 switch (identifier.type) {
18193 case PM_TOKEN_CONSTANT:
18194 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18195 break;
18196 case PM_TOKEN_INSTANCE_VARIABLE:
18197 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18198 break;
18199 case PM_TOKEN_CLASS_VARIABLE:
18200 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18201 break;
18202 case PM_TOKEN_GLOBAL_VARIABLE:
18203 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18204 break;
18205 case PM_TOKEN_KEYWORD_NIL:
18206 receiver = UP(pm_nil_node_create(parser, &identifier));
18207 break;
18208 case PM_TOKEN_KEYWORD_SELF:
18209 receiver = UP(pm_self_node_create(parser, &identifier));
18210 break;
18211 case PM_TOKEN_KEYWORD_TRUE:
18212 receiver = UP(pm_true_node_create(parser, &identifier));
18213 break;
18214 case PM_TOKEN_KEYWORD_FALSE:
18215 receiver = UP(pm_false_node_create(parser, &identifier));
18216 break;
18217 case PM_TOKEN_KEYWORD___FILE__:
18218 receiver = UP(pm_source_file_node_create(parser, &identifier));
18219 break;
18220 case PM_TOKEN_KEYWORD___LINE__:
18221 receiver = UP(pm_source_line_node_create(parser, &identifier));
18222 break;
18223 case PM_TOKEN_KEYWORD___ENCODING__:
18224 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18225 break;
18226 default:
18227 break;
18228 }
18229
18230 name = parse_method_definition_name(parser);
18231 } else {
18232 if (!valid_name) {
18233 PM_PARSER_ERR_TOKEN_FORMAT(parser, &identifier, PM_ERR_DEF_NAME, pm_token_str(identifier.type));
18234 }
18235
18236 name = identifier;
18237 }
18238 break;
18239 }
18240 case PM_TOKEN_PARENTHESIS_LEFT: {
18241 /* The current context is `PM_CONTEXT_DEF_PARAMS`, however the inner
18242 * expression of this parenthesis should not be processed under this
18243 * context. Thus, the context is popped here. */
18244 context_pop(parser);
18245 parser_lex(parser);
18246
18247 pm_token_t lparen = parser->previous;
18248 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18249
18250 accept1(parser, PM_TOKEN_NEWLINE);
18251 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18252 pm_token_t rparen = parser->previous;
18253
18254 lex_state_set(parser, PM_LEX_STATE_FNAME);
18255 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18256
18257 operator = parser->previous;
18258 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18259
18260 /* To push `PM_CONTEXT_DEF_PARAMS` again is for the same reason as
18261 * described the above. */
18262 pm_parser_scope_push(parser, true);
18263 context_push(parser, PM_CONTEXT_DEF_PARAMS);
18264 name = parse_method_definition_name(parser);
18265 break;
18266 }
18267 default:
18268 pm_parser_scope_push(parser, true);
18269 name = parse_method_definition_name(parser);
18270 break;
18271 }
18272
18273 pm_token_t lparen = { 0 };
18274 pm_token_t rparen = { 0 };
18275 pm_parameters_node_t *params;
18276
18277 bool accept_endless_def = true;
18278 switch (parser->current.type) {
18279 case PM_TOKEN_PARENTHESIS_LEFT: {
18280 parser_lex(parser);
18281 lparen = parser->previous;
18282
18283 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18284 params = NULL;
18285 } else {
18286 /* https://bugs.ruby-lang.org/issues/19107 */
18287 bool allow_trailing_comma = parser->version >= PM_OPTIONS_VERSION_CRUBY_4_1;
18288 params = parse_parameters(
18289 parser,
18290 PM_BINDING_POWER_DEFINED,
18291 true,
18292 allow_trailing_comma,
18293 true,
18294 true,
18295 false,
18296 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18297 (uint16_t) (depth + 1)
18298 );
18299 }
18300
18301 lex_state_set(parser, PM_LEX_STATE_BEG);
18302 parser->command_start = true;
18303
18304 context_pop(parser);
18305 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18306 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_str(parser->current.type));
18307 parser->previous.start = parser->previous.end;
18308 parser->previous.type = 0;
18309 }
18310
18311 rparen = parser->previous;
18312 break;
18313 }
18314 case PM_CASE_PARAMETER: {
18315 /* If we're about to lex a label, we need to add the label state to
18316 * make sure the next newline is ignored. */
18317 if (parser->current.type == PM_TOKEN_LABEL) {
18318 lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
18319 }
18320
18321 params = parse_parameters(
18322 parser,
18323 PM_BINDING_POWER_DEFINED,
18324 false,
18325 false,
18326 true,
18327 true,
18328 false,
18329 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18330 (uint16_t) (depth + 1)
18331 );
18332
18333 /* Reject `def * = 1` and similar. We have to specifically check for
18334 * them because they create ambiguity with optional arguments. */
18335 accept_endless_def = false;
18336
18337 context_pop(parser);
18338 break;
18339 }
18340 default: {
18341 params = NULL;
18342 context_pop(parser);
18343 break;
18344 }
18345 }
18346
18347 pm_node_t *statements = NULL;
18348 pm_token_t equal = { 0 };
18349 pm_token_t end_keyword = { 0 };
18350
18351 if (accept1(parser, PM_TOKEN_EQUAL)) {
18352 if (token_is_setter_name(&name)) {
18353 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18354 }
18355 if (!accept_endless_def) {
18356 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18357 }
18358 if (
18359 parser->current_context->context == PM_CONTEXT_DEFAULT_PARAMS &&
18360 parser->current_context->prev->context == PM_CONTEXT_BLOCK_PARAMETERS
18361 ) {
18362 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE, "endless method definition");
18363 }
18364 equal = parser->previous;
18365
18366 context_push(parser, PM_CONTEXT_DEF);
18367 pm_do_loop_stack_push(parser, false);
18368 statements = UP(pm_statements_node_create(parser));
18369
18370 uint8_t allow_flags;
18371 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_4_0) {
18372 allow_flags = flags & PM_PARSE_ACCEPTS_COMMAND_CALL;
18373 } else {
18374 /* Allow `def foo = puts "Hello"` but not
18375 * `private def foo = puts "Hello"` */
18376 allow_flags = (binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION) ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0;
18377 }
18378
18379 /* Inside a def body, we push true onto the accepts_block_stack so that
18380 * `do` is lexed as PM_TOKEN_KEYWORD_DO (which can only start a block
18381 * for primary-level constructs, not commands). During command argument
18382 * parsing, the stack is pushed to false, causing `do` to be lexed as
18383 * PM_TOKEN_KEYWORD_DO_BLOCK, which is not consumed inside the endless
18384 * def body and instead left for the outer context. */
18385 pm_accepts_block_stack_push(parser, true);
18386 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_flags | PM_PARSE_IN_ENDLESS_DEF, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18387 pm_accepts_block_stack_pop(parser);
18388
18389 /* If an unconsumed PM_TOKEN_KEYWORD_DO follows the body, it is an error
18390 * (e.g., `def f = 1 do end`). PM_TOKEN_KEYWORD_DO_BLOCK is
18391 * intentionally not caught here — it should bubble up to the outer
18392 * context (e.g., `private def f = puts "Hello" do end` where the block
18393 * attaches to `private`). */
18394 if (accept1(parser, PM_TOKEN_KEYWORD_DO)) {
18395 pm_block_node_t *block = parse_block(parser, (uint16_t) (depth + 1));
18396 pm_parser_err_node(parser, UP(block), PM_ERR_DEF_ENDLESS_DO_BLOCK);
18397 }
18398
18399 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18400 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
18401
18402 pm_token_t rescue_keyword = parser->previous;
18403
18404 /* In the Ruby grammar, the rescue value of an endless method
18405 * command excludes and/or and in/=>. */
18406 pm_node_t *value = parse_expression(parser, PM_BINDING_POWER_MATCH + 1, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18407 context_pop(parser);
18408
18409 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18410 }
18411
18412 /* A nested endless def whose body is a command call (e.g.,
18413 * `def f = def g = foo bar`) is a command assignment and cannot appear
18414 * as a def body. */
18415 if (PM_NODE_TYPE_P(statement, PM_DEF_NODE) && pm_command_call_value_p(statement)) {
18416 PM_PARSER_ERR_NODE_FORMAT(parser, statement, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
18417 }
18418
18419 pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
18420 pm_do_loop_stack_pop(parser);
18421 context_pop(parser);
18422 } else {
18423 if (lparen.start == NULL) {
18424 lex_state_set(parser, PM_LEX_STATE_BEG);
18425 parser->command_start = true;
18426 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18427 } else {
18428 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18429 }
18430
18431 pm_accepts_block_stack_push(parser, true);
18432 pm_do_loop_stack_push(parser, false);
18433
18434 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18435 pm_accepts_block_stack_push(parser, true);
18436 statements = UP(parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18437 pm_accepts_block_stack_pop(parser);
18438 }
18439
18440 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18441 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18442 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18443 } else {
18444 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
18445 }
18446
18447 pm_accepts_block_stack_pop(parser);
18448 pm_do_loop_stack_pop(parser);
18449
18450 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18451 end_keyword = parser->previous;
18452 }
18453
18454 pm_constant_id_list_t locals;
18455 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18456 pm_parser_scope_pop(parser);
18457
18458 /* If the final character is `@` as is the case when defining methods to
18459 * override the unary operators, we should ignore the @ in the same way we
18460 * do for symbols. */
18461 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.start, parse_operator_symbol_name(&name));
18462
18463 flush_block_exits(parser, previous_block_exits);
18464
18465 return UP(pm_def_node_create(
18466 parser,
18467 name_id,
18468 &name,
18469 receiver,
18470 params,
18471 statements,
18472 &locals,
18473 &def_keyword,
18474 NTOK2PTR(operator),
18475 NTOK2PTR(lparen),
18476 NTOK2PTR(rparen),
18477 NTOK2PTR(equal),
18478 NTOK2PTR(end_keyword)
18479 ));
18480}
18481
18485static pm_node_t *
18486parse_module(pm_parser_t *parser, uint8_t flags, uint16_t depth) {
18487 pm_node_list_t current_block_exits = { 0 };
18488 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18489
18490 size_t opening_newline_index = token_newline_index(parser);
18491 parser_lex(parser);
18492 pm_token_t module_keyword = parser->previous;
18493
18494 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
18495 pm_token_t name;
18496
18497 /* If we can recover from a syntax error that occurred while parsing the
18498 * name of the module, then we'll handle that here. */
18499 if (PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18500 pop_block_exits(parser, previous_block_exits);
18501
18502 pm_token_t missing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18503 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
18504 }
18505
18506 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
18507 pm_token_t double_colon = parser->previous;
18508
18509 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18510 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous));
18511 }
18512
18513 /* Here we retrieve the name of the module. If it wasn't a constant, then
18514 * it's possible that `module foo` was passed, which is a syntax error. We
18515 * handle that here as well. */
18516 name = parser->previous;
18517 if (name.type != PM_TOKEN_CONSTANT) {
18518 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
18519 }
18520
18521 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE) && !PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !PM_NODE_TYPE_P(constant_path, PM_ERROR_RECOVERY_NODE)) {
18522 constant_path = UP(pm_error_recovery_node_create_unexpected(parser, constant_path));
18523 }
18524
18525 pm_parser_scope_push(parser, true);
18526 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
18527 pm_node_t *statements = NULL;
18528
18529 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18530 pm_accepts_block_stack_push(parser, true);
18531 statements = UP(parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
18532 pm_accepts_block_stack_pop(parser);
18533 }
18534
18535 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18536 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18537 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
18538 } else {
18539 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
18540 }
18541
18542 pm_constant_id_list_t locals;
18543 pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
18544
18545 pm_parser_scope_pop(parser);
18546 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
18547
18548 if (context_def_p(parser)) {
18549 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
18550 }
18551
18552 pop_block_exits(parser, previous_block_exits);
18553
18554 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous));
18555}
18556
18560static pm_node_t *
18561parse_string_array(pm_parser_t *parser, uint16_t depth) {
18562 parser_lex(parser);
18563 pm_token_t opening = parser->previous;
18564 pm_array_node_t *array = pm_array_node_create(parser, &opening);
18565
18566 /* This is the current node that we are parsing that will be added to the
18567 * list of elements. */
18568 pm_node_t *current = NULL;
18569
18570 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
18571 switch (parser->current.type) {
18572 case PM_TOKEN_WORDS_SEP: {
18573 /* Reset the explicit encoding if we hit a separator since each
18574 * element can have its own encoding. */
18575 parser->explicit_encoding = NULL;
18576
18577 if (current == NULL) {
18578 /* If we hit a separator before we have any content, then we
18579 * don't need to do anything. */
18580 } else {
18581 /* If we hit a separator after we've hit content, then we
18582 * need to append that content to the list and reset the
18583 * current node. */
18584 pm_array_node_elements_append(parser->arena, array, current);
18585 current = NULL;
18586 }
18587
18588 parser_lex(parser);
18589 break;
18590 }
18591 case PM_TOKEN_STRING_CONTENT: {
18592 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
18593 pm_node_flag_set(string, parse_unescaped_encoding(parser));
18594 parser_lex(parser);
18595
18596 if (current == NULL) {
18597 /* If we hit content and the current node is NULL, then this
18598 * is the first string content we've seen. In that case
18599 * we're going to create a new string node and set that to
18600 * the current. */
18601 current = string;
18602 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18603 /* If we hit string content and the current node is an
18604 * interpolated string, then we need to append the string
18605 * content to the list of child nodes. */
18606 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
18607 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18608 /* If we hit string content and the current node is a string
18609 * node, then we need to convert the current node into an
18610 * interpolated string and add the string content to the
18611 * list of child nodes. */
18612 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18613 pm_interpolated_string_node_append(parser, interpolated, current);
18614 pm_interpolated_string_node_append(parser, interpolated, string);
18615 current = UP(interpolated);
18616 } else {
18617 assert(false && "unreachable");
18618 }
18619
18620 break;
18621 }
18622 case PM_TOKEN_EMBVAR: {
18623 if (current == NULL) {
18624 /* If we hit an embedded variable and the current node is
18625 * NULL, then this is the start of a new string. We'll set
18626 * the current node to a new interpolated string. */
18627 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
18628 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18629 /* If we hit an embedded variable and the current node is a
18630 * string node, then we'll convert the current into an
18631 * interpolated string and add the string node to the list
18632 * of parts. */
18633 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18634 pm_interpolated_string_node_append(parser, interpolated, current);
18635 current = UP(interpolated);
18636 } else {
18637 /* If we hit an embedded variable and the current node is an
18638 * interpolated string, then we'll just add the embedded
18639 * variable. */
18640 }
18641
18642 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18643 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
18644 break;
18645 }
18646 case PM_TOKEN_EMBEXPR_BEGIN: {
18647 if (current == NULL) {
18648 /* If we hit an embedded expression and the current node is
18649 * NULL, then this is the start of a new string. We'll set
18650 * the current node to a new interpolated string. */
18651 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
18652 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
18653 /* If we hit an embedded expression and the current node is
18654 * a string node, then we'll convert the current into an
18655 * interpolated string and add the string node to the list
18656 * of parts. */
18657 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
18658 pm_interpolated_string_node_append(parser, interpolated, current);
18659 current = UP(interpolated);
18660 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
18661 /* If we hit an embedded expression and the current node is
18662 * an interpolated string, then we'll just continue on. */
18663 } else {
18664 assert(false && "unreachable");
18665 }
18666
18667 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18668 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, part);
18669 break;
18670 }
18671 default:
18672 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
18673 parser_lex(parser);
18674 break;
18675 }
18676 }
18677
18678 /* If we have a current node, then we need to append it to the list. */
18679 if (current) {
18680 pm_array_node_elements_append(parser->arena, array, current);
18681 }
18682
18683 pm_token_t closing = parser->current;
18684 if (match1(parser, PM_TOKEN_EOF)) {
18685 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
18686 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18687 } else {
18688 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
18689 }
18690
18691 pm_array_node_close_set(parser, array, &closing);
18692 return UP(array);
18693}
18694
18698static pm_node_t *
18699parse_symbol_array(pm_parser_t *parser, uint16_t depth) {
18700 parser_lex(parser);
18701 pm_token_t opening = parser->previous;
18702 pm_array_node_t *array = pm_array_node_create(parser, &opening);
18703
18704 /* This is the current node that we are parsing that will be added to the
18705 * list of elements. */
18706 pm_node_t *current = NULL;
18707
18708 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
18709 switch (parser->current.type) {
18710 case PM_TOKEN_WORDS_SEP: {
18711 if (current == NULL) {
18712 /* If we hit a separator before we have any content, then we
18713 * don't need to do anything. */
18714 } else {
18715 /* If we hit a separator after we've hit content, then we
18716 * need to append that content to the list and reset the
18717 * current node. */
18718 pm_array_node_elements_append(parser->arena, array, current);
18719 current = NULL;
18720 }
18721
18722 parser_lex(parser);
18723 break;
18724 }
18725 case PM_TOKEN_STRING_CONTENT: {
18726 if (current == NULL) {
18727 /* If we hit content and the current node is NULL, then this
18728 * is the first string content we've seen. In that case
18729 * we're going to create a new string node and set that to
18730 * the current. */
18731 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
18732 parser_lex(parser);
18733 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
18734 /* If we hit string content and the current node is an
18735 * interpolated string, then we need to append the string
18736 * content to the list of child nodes. */
18737 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
18738 parser_lex(parser);
18739
18740 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
18741 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18742 /* If we hit string content and the current node is a symbol
18743 * node, then we need to convert the current node into an
18744 * interpolated string and add the string content to the
18745 * list of child nodes. */
18746 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
18747 pm_token_t content = {
18748 .type = PM_TOKEN_STRING_CONTENT,
18749 .start = parser->start + cast->value_loc.start,
18750 .end = parser->start + cast->value_loc.start + cast->value_loc.length
18751 };
18752
18753 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
18754 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
18755 parser_lex(parser);
18756
18757 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18758 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
18759 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
18760
18761 current = UP(interpolated);
18762 } else {
18763 assert(false && "unreachable");
18764 }
18765
18766 break;
18767 }
18768 case PM_TOKEN_EMBVAR: {
18769 bool start_location_set = false;
18770 if (current == NULL) {
18771 /* If we hit an embedded variable and the current node is
18772 * NULL, then this is the start of a new string. We'll set
18773 * the current node to a new interpolated string. */
18774 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
18775 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18776 /* If we hit an embedded variable and the current node is a
18777 * string node, then we'll convert the current into an
18778 * interpolated string and add the string node to the list
18779 * of parts. */
18780 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18781
18782 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
18783 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
18784 PM_NODE_START_SET_NODE(interpolated, current);
18785 start_location_set = true;
18786 current = UP(interpolated);
18787 } else {
18788 /* If we hit an embedded variable and the current node is an
18789 * interpolated string, then we'll just add the embedded
18790 * variable. */
18791 }
18792
18793 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18794 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
18795 if (!start_location_set) {
18796 PM_NODE_START_SET_NODE(current, part);
18797 }
18798 break;
18799 }
18800 case PM_TOKEN_EMBEXPR_BEGIN: {
18801 bool start_location_set = false;
18802 if (current == NULL) {
18803 /* If we hit an embedded expression and the current node is
18804 * NULL, then this is the start of a new string. We'll set
18805 * the current node to a new interpolated string. */
18806 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
18807 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
18808 /* If we hit an embedded expression and the current node is
18809 * a string node, then we'll convert the current into an
18810 * interpolated string and add the string node to the list
18811 * of parts. */
18812 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
18813
18814 current = UP(pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current));
18815 pm_interpolated_symbol_node_append(parser->arena, interpolated, current);
18816 PM_NODE_START_SET_NODE(interpolated, current);
18817 start_location_set = true;
18818 current = UP(interpolated);
18819 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
18820 /* If we hit an embedded expression and the current node is
18821 * an interpolated string, then we'll just continue on. */
18822 } else {
18823 assert(false && "unreachable");
18824 }
18825
18826 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
18827 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, part);
18828 if (!start_location_set) {
18829 PM_NODE_START_SET_NODE(current, part);
18830 }
18831 break;
18832 }
18833 default:
18834 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
18835 parser_lex(parser);
18836 break;
18837 }
18838 }
18839
18840 /* If we have a current node, then we need to append it to the list. */
18841 if (current) {
18842 pm_array_node_elements_append(parser->arena, array, current);
18843 }
18844
18845 pm_token_t closing = parser->current;
18846 if (match1(parser, PM_TOKEN_EOF)) {
18847 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
18848 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
18849 } else {
18850 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
18851 }
18852 pm_array_node_close_set(parser, array, &closing);
18853
18854 return UP(array);
18855}
18856
18861static pm_node_t *
18862parse_parentheses(pm_parser_t *parser, pm_binding_power_t binding_power, uint16_t depth) {
18863 pm_token_t opening = parser->current;
18864 pm_node_flags_t paren_flags = 0;
18865
18866 pm_node_list_t current_block_exits = { 0 };
18867 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18868
18869 parser_lex(parser);
18870 while (true) {
18871 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18872 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18873 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18874 break;
18875 }
18876 }
18877
18878 /* If this is the end of the file or we match a right parenthesis, then we
18879 * have an empty parentheses node, and we can immediately return. */
18880 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18881 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18882 pop_block_exits(parser, previous_block_exits);
18883 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->previous, paren_flags));
18884 }
18885
18886 /* Otherwise, we're going to parse the first statement in the list of
18887 * statements within the parentheses. */
18888 pm_accepts_block_stack_push(parser, true);
18889 context_push(parser, PM_CONTEXT_PARENS);
18890 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18891 context_pop(parser);
18892
18893 /* Determine if this statement is followed by a terminator. In the case of a
18894 * single statement, this is fine. But in the case of multiple statements
18895 * it's required. */
18896 bool terminator_found = false;
18897
18898 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18899 terminator_found = true;
18900 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18901 } else if (accept1(parser, PM_TOKEN_NEWLINE)) {
18902 terminator_found = true;
18903 }
18904
18905 if (terminator_found) {
18906 while (true) {
18907 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
18908 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18909 } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
18910 break;
18911 }
18912 }
18913 }
18914
18915 /* If we hit a right parenthesis, then we're done parsing the parentheses
18916 * node, and we can check which kind of node we should return. */
18917 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18918 if (opening.type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
18919 lex_state_set(parser, PM_LEX_STATE_ENDARG);
18920 }
18921
18922 parser_lex(parser);
18923 pm_accepts_block_stack_pop(parser);
18924 pop_block_exits(parser, previous_block_exits);
18925
18926 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18927 /* If we have a single statement and are ending on a right
18928 * parenthesis, then we need to check if this is possibly a multiple
18929 * target node. */
18930 pm_multi_target_node_t *multi_target;
18931
18932 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
18933 multi_target = (pm_multi_target_node_t *) statement;
18934 } else {
18935 multi_target = pm_multi_target_node_create(parser);
18936 pm_multi_target_node_targets_append(parser, multi_target, statement);
18937 }
18938
18939 multi_target->lparen_loc = TOK2LOC(parser, &opening);
18940 multi_target->rparen_loc = TOK2LOC(parser, &parser->previous);
18941 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
18942 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->previous);
18943
18944 pm_node_t *result;
18945 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18946 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18947 accept1(parser, PM_TOKEN_NEWLINE);
18948 } else {
18949 result = UP(multi_target);
18950 }
18951
18952 if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18953 /* All set, this is explicitly allowed by the parent context. */
18954 } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18955 /* All set, we're inside a for loop and we're parsing multiple
18956 * targets. */
18957 } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18958 /* Multi targets are not allowed when it's not a statement
18959 * level. */
18960 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18961 } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18962 /* Multi targets must be followed by an equal sign in order to
18963 * be valid (or a right parenthesis if they are nested). */
18964 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18965 }
18966
18967 return result;
18968 }
18969
18970 /* If we have a single statement and are ending on a right parenthesis
18971 * and we didn't return a multiple assignment node, then we can return a
18972 * regular parentheses node now. */
18973 pm_statements_node_t *statements = pm_statements_node_create(parser);
18974 pm_statements_node_body_append(parser, statements, statement, true);
18975
18976 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
18977 }
18978
18979 /* If we have more than one statement in the set of parentheses, then we are
18980 * going to parse all of them as a list of statements. We'll do that here.
18981 */
18982 context_push(parser, PM_CONTEXT_PARENS);
18983 paren_flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
18984
18985 pm_statements_node_t *statements = pm_statements_node_create(parser);
18986 pm_statements_node_body_append(parser, statements, statement, true);
18987
18988 /* If we didn't find a terminator and we didn't find a right parenthesis,
18989 * then this is a syntax error. */
18990 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18991 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
18992 }
18993
18994 /* Parse each statement within the parentheses. */
18995 while (true) {
18996 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18997 pm_statements_node_body_append(parser, statements, node, true);
18998
18999 /* If we're recovering from a syntax error, then we need to stop parsing
19000 * the statements now. */
19001 if (parser->recovering) {
19002 /* If this is the level of context where the recovery has happened,
19003 * then we can mark the parser as done recovering. */
19004 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
19005 break;
19006 }
19007
19008 /* If we couldn't parse an expression at all, then we need to bail out
19009 * of the loop. */
19010 if (PM_NODE_TYPE_P(node, PM_ERROR_RECOVERY_NODE)) break;
19011
19012 /* If we successfully parsed a statement, then we are going to need a
19013 * terminator to delimit them. */
19014 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19015 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
19016 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
19017 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19018 break;
19019 } else if (!match1(parser, PM_TOKEN_EOF)) {
19020 /* If we're at the end of the file, then we're going to add an error
19021 * after this for the ) anyway. */
19022 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
19023 }
19024 }
19025
19026 context_pop(parser);
19027 pm_accepts_block_stack_pop(parser);
19028 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19029
19030 /* When we're parsing multi targets, we allow them to be followed by a right
19031 * parenthesis if they are at the statement level. This is only possible if
19032 * they are the final statement in a parentheses. We need to explicitly
19033 * reject that here. */
19034 {
19035 pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
19036
19037 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
19038 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
19039 pm_multi_target_node_targets_append(parser, multi_target, statement);
19040
19041 statement = UP(multi_target);
19042 statements->body.nodes[statements->body.size - 1] = statement;
19043 }
19044
19045 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
19046 const uint8_t *offset = parser->start + PM_NODE_END(statement);
19047 pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
19048 pm_node_t *value = UP(pm_error_recovery_node_create(parser, PM_NODE_END(statement), 0));
19049
19050 statement = UP(pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value));
19051 statements->body.nodes[statements->body.size - 1] = statement;
19052
19053 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
19054 }
19055 }
19056
19057 pop_block_exits(parser, previous_block_exits);
19058 pm_void_statements_check(parser, statements, true);
19059 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->previous, paren_flags));
19060}
19061
19065static PRISM_INLINE pm_node_t *
19066parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
19067 switch (parser->current.type) {
19068 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
19069 parser_lex(parser);
19070
19071 pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
19072 pm_accepts_block_stack_push(parser, true);
19073 bool parsed_bare_hash = false;
19074
19075 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
19076 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
19077
19078 // Handle the case where we don't have a comma and we have a
19079 // newline followed by a right bracket.
19080 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19081 break;
19082 }
19083
19084 // Ensure that we have a comma between elements in the array.
19085 if (array->elements.size > 0) {
19086 if (accept1(parser, PM_TOKEN_COMMA)) {
19087 // If there was a comma but we also accepts a newline,
19088 // then this is a syntax error.
19089 if (accepted_newline) {
19090 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
19091 }
19092 } else {
19093 // If there was no comma, then we need to add a syntax
19094 // error.
19095 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_END(parser, &parser->previous), 0, PM_ERR_ARRAY_SEPARATOR, pm_token_str(parser->current.type));
19096 parser->previous.start = parser->previous.end;
19097 parser->previous.type = 0;
19098 }
19099 }
19100
19101 // If we have a right bracket immediately following a comma,
19102 // this is allowed since it's a trailing comma. In this case we
19103 // can break out of the loop.
19104 if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
19105
19106 pm_node_t *element;
19107
19108 if (accept1(parser, PM_TOKEN_USTAR)) {
19109 pm_token_t operator = parser->previous;
19110 pm_node_t *expression = NULL;
19111
19112 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
19113 pm_parser_scope_forwarding_positionals_check(parser, &operator);
19114 } else {
19115 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19116 }
19117
19118 element = UP(pm_splat_node_create(parser, &operator, expression));
19119 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
19120 if (parsed_bare_hash) {
19121 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
19122 }
19123
19124 element = UP(pm_keyword_hash_node_create(parser));
19125 pm_static_literals_t hash_keys = { 0 };
19126
19127 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_DO_BLOCK, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
19128 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
19129 }
19130
19131 pm_static_literals_free(&hash_keys);
19132 parsed_bare_hash = true;
19133 } else {
19134 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_LABEL), PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
19135
19136 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
19137 if (parsed_bare_hash) {
19138 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
19139 }
19140
19141 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
19142 pm_static_literals_t hash_keys = { 0 };
19143 pm_hash_key_static_literals_add(parser, &hash_keys, element);
19144
19145 pm_token_t operator = { 0 };
19146 if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
19147 operator = parser->previous;
19148 }
19149
19150 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
19151 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(operator), value));
19152 pm_keyword_hash_node_elements_append(parser->arena, hash, assoc);
19153
19154 element = UP(hash);
19155 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19156 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
19157 }
19158
19159 pm_static_literals_free(&hash_keys);
19160 parsed_bare_hash = true;
19161 }
19162 }
19163
19164 pm_array_node_elements_append(parser->arena, array, element);
19165 if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
19166 }
19167
19168 accept1(parser, PM_TOKEN_NEWLINE);
19169
19170 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
19171 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_ARRAY_TERM, pm_token_str(parser->current.type));
19172 parser->previous.start = parser->previous.end;
19173 parser->previous.type = 0;
19174 }
19175
19176 pm_array_node_close_set(parser, array, &parser->previous);
19177 pm_accepts_block_stack_pop(parser);
19178
19179 return UP(array);
19180 }
19181 case PM_TOKEN_PARENTHESIS_LEFT:
19182 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
19183 return parse_parentheses(parser, binding_power, depth);
19184 case PM_TOKEN_BRACE_LEFT: {
19185 // If we were passed a current_hash_keys via the parser, then that
19186 // means we're already parsing a hash and we want to share the set
19187 // of hash keys with this inner hash we're about to parse for the
19188 // sake of warnings. We'll set it to NULL after we grab it to make
19189 // sure subsequent expressions don't use it. Effectively this is a
19190 // way of getting around passing it to every call to
19191 // parse_expression.
19192 pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
19193 parser->current_hash_keys = NULL;
19194
19195 pm_accepts_block_stack_push(parser, true);
19196 parser_lex(parser);
19197
19198 pm_token_t opening = parser->previous;
19199 pm_hash_node_t *node = pm_hash_node_create(parser, &opening);
19200
19201 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
19202 if (current_hash_keys != NULL) {
19203 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
19204 } else {
19205 pm_static_literals_t hash_keys = { 0 };
19206 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
19207 pm_static_literals_free(&hash_keys);
19208 }
19209
19210 accept1(parser, PM_TOKEN_NEWLINE);
19211 }
19212
19213 pm_accepts_block_stack_pop(parser);
19214 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
19215 pm_hash_node_closing_loc_set(parser, node, &parser->previous);
19216
19217 return UP(node);
19218 }
19219 case PM_TOKEN_CHARACTER_LITERAL: {
19220 pm_node_t *node = UP(pm_string_node_create_current_string(
19221 parser,
19222 &(pm_token_t) {
19223 .type = PM_TOKEN_STRING_BEGIN,
19224 .start = parser->current.start,
19225 .end = parser->current.start + 1
19226 },
19227 &(pm_token_t) {
19228 .type = PM_TOKEN_STRING_CONTENT,
19229 .start = parser->current.start + 1,
19230 .end = parser->current.end
19231 },
19232 NULL
19233 ));
19234
19235 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19236
19237 // Skip past the character literal here, since now we have handled
19238 // parser->explicit_encoding correctly.
19239 parser_lex(parser);
19240
19241 // Characters can be followed by strings in which case they are
19242 // automatically concatenated.
19243 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
19244 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
19245 }
19246
19247 return node;
19248 }
19249 case PM_TOKEN_CLASS_VARIABLE: {
19250 parser_lex(parser);
19251 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->previous));
19252
19253 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19254 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19255 }
19256
19257 return node;
19258 }
19259 case PM_TOKEN_CONSTANT: {
19260 parser_lex(parser);
19261 pm_token_t constant = parser->previous;
19262
19263 // If a constant is immediately followed by parentheses, then this is in
19264 // fact a method call, not a constant read.
19265 if (
19266 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
19267 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
19268 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
19269 match1(parser, PM_TOKEN_BRACE_LEFT)
19270 ) {
19271 pm_arguments_t arguments = { 0 };
19272 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19273 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
19274 }
19275
19276 pm_node_t *node = UP(pm_constant_read_node_create(parser, &parser->previous));
19277
19278 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19279 // If we get here, then we have a comma immediately following a
19280 // constant, so we're going to parse this as a multiple assignment.
19281 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19282 }
19283
19284 return node;
19285 }
19286 case PM_TOKEN_UCOLON_COLON: {
19287 parser_lex(parser);
19288 pm_token_t delimiter = parser->previous;
19289
19290 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19291 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous));
19292
19293 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19294 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19295 }
19296
19297 return node;
19298 }
19299 case PM_TOKEN_UDOT_DOT:
19300 case PM_TOKEN_UDOT_DOT_DOT: {
19301 pm_token_t operator = parser->current;
19302 parser_lex(parser);
19303
19304 pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
19305
19306 // Unary .. and ... are special because these are non-associative
19307 // operators that can also be unary operators. In this case we need
19308 // to explicitly reject code that has a .. or ... that follows this
19309 // expression.
19310 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
19311 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
19312 }
19313
19314 return UP(pm_range_node_create(parser, NULL, &operator, right));
19315 }
19316 case PM_TOKEN_FLOAT:
19317 parser_lex(parser);
19318 return UP(pm_float_node_create(parser, &parser->previous));
19319 case PM_TOKEN_FLOAT_IMAGINARY:
19320 parser_lex(parser);
19321 return UP(pm_float_node_imaginary_create(parser, &parser->previous));
19322 case PM_TOKEN_FLOAT_RATIONAL:
19323 parser_lex(parser);
19324 return UP(pm_float_node_rational_create(parser, &parser->previous));
19325 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
19326 parser_lex(parser);
19327 return UP(pm_float_node_rational_imaginary_create(parser, &parser->previous));
19328 case PM_TOKEN_NUMBERED_REFERENCE: {
19329 parser_lex(parser);
19330 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->previous));
19331
19332 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19333 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19334 }
19335
19336 return node;
19337 }
19338 case PM_TOKEN_GLOBAL_VARIABLE: {
19339 parser_lex(parser);
19340 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->previous));
19341
19342 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19343 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19344 }
19345
19346 return node;
19347 }
19348 case PM_TOKEN_BACK_REFERENCE: {
19349 parser_lex(parser);
19350 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->previous));
19351
19352 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19353 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19354 }
19355
19356 return node;
19357 }
19358 case PM_TOKEN_IDENTIFIER:
19359 case PM_TOKEN_METHOD_NAME: {
19360 parser_lex(parser);
19361 pm_token_t identifier = parser->previous;
19362 pm_node_t *node = parse_variable_call(parser);
19363
19364 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
19365 // If parse_variable_call returned with a call node, then we
19366 // know the identifier is not in the local table. In that case
19367 // we need to check if there are arguments following the
19368 // identifier.
19369 pm_call_node_t *call = (pm_call_node_t *) node;
19370 pm_arguments_t arguments = { 0 };
19371
19372 if (parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1))) {
19373 // Since we found arguments, we need to turn off the
19374 // variable call bit in the flags.
19375 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
19376
19377 call->opening_loc = arguments.opening_loc;
19378 call->arguments = arguments.arguments;
19379 call->closing_loc = arguments.closing_loc;
19380 call->block = arguments.block;
19381
19382 const pm_location_t *end = pm_arguments_end(&arguments);
19383 if (end == NULL) {
19384 PM_NODE_LENGTH_SET_LOCATION(call, &call->message_loc);
19385 } else {
19386 PM_NODE_LENGTH_SET_LOCATION(call, end);
19387 }
19388 }
19389 } else {
19390 // Otherwise, we know the identifier is in the local table. This
19391 // can still be a method call if it is followed by arguments or
19392 // a block, so we need to check for that here.
19393 if (
19394 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
19395 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
19396 match1(parser, PM_TOKEN_BRACE_LEFT)
19397 ) {
19398 pm_arguments_t arguments = { 0 };
19399 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19400 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
19401
19402 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
19403 // If we're about to convert an 'it' implicit local
19404 // variable read into a method call, we need to remove
19405 // it from the list of implicit local variables.
19406 pm_node_unreference(parser, node);
19407 } else {
19408 // Otherwise, we're about to convert a regular local
19409 // variable read into a method call, in which case we
19410 // need to indicate that this was not a read for the
19411 // purposes of warnings.
19412 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
19413
19414 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
19415 pm_node_unreference(parser, node);
19416 } else {
19418 pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
19419 }
19420 }
19421
19422 return UP(fcall);
19423 }
19424 }
19425
19426 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
19427 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19428 }
19429
19430 return node;
19431 }
19432 case PM_TOKEN_HEREDOC_START: {
19433 // Here we have found a heredoc. We'll parse it and add it to the
19434 // list of strings.
19435 assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
19436 pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
19437
19438 size_t common_whitespace = (size_t) -1;
19439 parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
19440
19441 parser_lex(parser);
19442 pm_token_t opening = parser->previous;
19443
19444 pm_node_t *node;
19445 pm_node_t *part;
19446
19447 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19448 // If we get here, then we have an empty heredoc. We'll create
19449 // an empty content token and return an empty string node.
19450 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19451 pm_token_t content = parse_strings_empty_content(parser->previous.start);
19452
19453 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19454 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
19455 } else {
19456 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY));
19457 }
19458
19459 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
19460 } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
19461 // If we get here, then we tried to find something in the
19462 // heredoc but couldn't actually parse anything, so we'll just
19463 // return a missing node.
19464 //
19465 // parse_string_part handles its own errors, so there is no need
19466 // for us to add one here.
19467 node = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19468 } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19469 // If we get here, then the part that we parsed was plain string
19470 // content and we're at the end of the heredoc, so we can return
19471 // just a string node with the heredoc opening and closing as
19472 // its opening and closing.
19473 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19474 pm_string_node_t *cast = (pm_string_node_t *) part;
19475
19476 cast->opening_loc = TOK2LOC(parser, &opening);
19477 cast->closing_loc = TOK2LOC(parser, &parser->current);
19478 cast->base.location = cast->opening_loc;
19479
19480 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19481 assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
19482 cast->base.type = PM_X_STRING_NODE;
19483 }
19484
19485 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
19486 parse_heredoc_dedent_string(parser->arena, &cast->unescaped, common_whitespace);
19487 }
19488
19489 node = UP(cast);
19490 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19491 } else {
19492 // If we get here, then we have multiple parts in the heredoc,
19493 // so we'll need to create an interpolated string node to hold
19494 // them all.
19495 pm_node_list_t parts = { 0 };
19496 pm_node_list_append(parser->arena, &parts, part);
19497
19498 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
19499 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19500 pm_node_list_append(parser->arena, &parts, part);
19501 }
19502 }
19503
19504 // Now that we have all of the parts, create the correct type of
19505 // interpolated node.
19506 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19507 pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19508 cast->parts = parts;
19509
19510 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19511 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->previous);
19512
19513 cast->base.location = cast->opening_loc;
19514 node = UP(cast);
19515 } else {
19516 pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
19517
19518 expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
19519 pm_interpolated_string_node_closing_set(parser, cast, &parser->previous);
19520
19521 cast->base.location = cast->opening_loc;
19522 node = UP(cast);
19523 }
19524
19525 // If this is a heredoc that is indented with a ~, then we need
19526 // to dedent each line by the common leading whitespace.
19527 if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
19528 pm_node_list_t *nodes;
19529 if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
19530 nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
19531 } else {
19532 nodes = &((pm_interpolated_string_node_t *) node)->parts;
19533 }
19534
19535 parse_heredoc_dedent(parser, nodes, common_whitespace);
19536 }
19537 }
19538
19539 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
19540 return parse_strings(parser, node, false, (uint16_t) (depth + 1));
19541 }
19542
19543 return node;
19544 }
19545 case PM_TOKEN_INSTANCE_VARIABLE: {
19546 parser_lex(parser);
19547 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->previous));
19548
19549 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
19550 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19551 }
19552
19553 return node;
19554 }
19555 case PM_TOKEN_INTEGER: {
19556 pm_node_flags_t base = parser->integer.base;
19557 parser_lex(parser);
19558 return UP(pm_integer_node_create(parser, base, &parser->previous));
19559 }
19560 case PM_TOKEN_INTEGER_IMAGINARY: {
19561 pm_node_flags_t base = parser->integer.base;
19562 parser_lex(parser);
19563 return UP(pm_integer_node_imaginary_create(parser, base, &parser->previous));
19564 }
19565 case PM_TOKEN_INTEGER_RATIONAL: {
19566 pm_node_flags_t base = parser->integer.base;
19567 parser_lex(parser);
19568 return UP(pm_integer_node_rational_create(parser, base, &parser->previous));
19569 }
19570 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
19571 pm_node_flags_t base = parser->integer.base;
19572 parser_lex(parser);
19573 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->previous));
19574 }
19575 case PM_TOKEN_KEYWORD___ENCODING__:
19576 parser_lex(parser);
19577 return UP(pm_source_encoding_node_create(parser, &parser->previous));
19578 case PM_TOKEN_KEYWORD___FILE__:
19579 parser_lex(parser);
19580 return UP(pm_source_file_node_create(parser, &parser->previous));
19581 case PM_TOKEN_KEYWORD___LINE__:
19582 parser_lex(parser);
19583 return UP(pm_source_line_node_create(parser, &parser->previous));
19584 case PM_TOKEN_KEYWORD_ALIAS: {
19585 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19586 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
19587 }
19588
19589 parser_lex(parser);
19590 pm_token_t keyword = parser->previous;
19591
19592 pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
19593 pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
19594
19595 switch (PM_NODE_TYPE(new_name)) {
19596 case PM_BACK_REFERENCE_READ_NODE:
19597 case PM_NUMBERED_REFERENCE_READ_NODE:
19598 case PM_GLOBAL_VARIABLE_READ_NODE: {
19599 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
19600 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
19601 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
19602 }
19603 } else if (!PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
19604 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
19605 old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
19606 }
19607
19608 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
19609 }
19610 case PM_SYMBOL_NODE:
19611 case PM_INTERPOLATED_SYMBOL_NODE: {
19612 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_ERROR_RECOVERY_NODE)) {
19613 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
19614 old_name = UP(pm_error_recovery_node_create_unexpected(parser, old_name));
19615 }
19616 }
19618 default:
19619 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
19620 }
19621 }
19622 case PM_TOKEN_KEYWORD_CASE:
19623 return parse_case(parser, flags, depth);
19624 case PM_TOKEN_KEYWORD_BEGIN: {
19625 size_t opening_newline_index = token_newline_index(parser);
19626 parser_lex(parser);
19627
19628 pm_token_t begin_keyword = parser->previous;
19629 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19630
19631 pm_node_list_t current_block_exits = { 0 };
19632 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19633 pm_statements_node_t *begin_statements = NULL;
19634
19635 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19636 pm_accepts_block_stack_push(parser, true);
19637 begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
19638 pm_accepts_block_stack_pop(parser);
19639 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19640 }
19641
19642 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
19643 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
19644 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
19645
19646 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->previous);
19647 pm_begin_node_end_keyword_set(parser, begin_node, &parser->previous);
19648 pop_block_exits(parser, previous_block_exits);
19649 return UP(begin_node);
19650 }
19651 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
19652 pm_node_list_t current_block_exits = { 0 };
19653 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19654
19655 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19656 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19657 }
19658
19659 parser_lex(parser);
19660 pm_token_t keyword = parser->previous;
19661
19662 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19663 pm_token_t opening = parser->previous;
19664 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19665
19666 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
19667 pm_context_t context = parser->current_context->context;
19668 if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19669 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19670 }
19671
19672 flush_block_exits(parser, previous_block_exits);
19673 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19674 }
19675 case PM_TOKEN_KEYWORD_BREAK:
19676 case PM_TOKEN_KEYWORD_NEXT:
19677 case PM_TOKEN_KEYWORD_RETURN: {
19678 parser_lex(parser);
19679
19680 pm_token_t keyword = parser->previous;
19681 pm_arguments_t arguments = { 0 };
19682
19683 if (
19684 token_begins_expression_p(parser->current.type) ||
19685 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19686 ) {
19687 pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19688
19689 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19690 pm_token_t next = parser->current;
19691 parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, flags, (uint16_t) (depth + 1));
19692
19693 // Reject `foo && return bar`.
19694 if (!(flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && arguments.arguments != NULL) {
19695 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(next.type));
19696 }
19697 }
19698
19699 // It's possible that we've parsed a block argument through our
19700 // call to parse_arguments. If we found one, we should mark it
19701 // as invalid and destroy it, as we don't have a place for it.
19702 if (arguments.block != NULL) {
19703 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19704 pm_node_unreference(parser, arguments.block);
19705 arguments.block = NULL;
19706 }
19707 }
19708
19709 switch (keyword.type) {
19710 case PM_TOKEN_KEYWORD_BREAK: {
19711 pm_node_t *node = UP(pm_break_node_create(parser, &keyword, arguments.arguments));
19712 if (!parser->partial_script) parse_block_exit(parser, node);
19713 return node;
19714 }
19715 case PM_TOKEN_KEYWORD_NEXT: {
19716 pm_node_t *node = UP(pm_next_node_create(parser, &keyword, arguments.arguments));
19717 if (!parser->partial_script) parse_block_exit(parser, node);
19718 return node;
19719 }
19720 case PM_TOKEN_KEYWORD_RETURN: {
19721 pm_node_t *node = UP(pm_return_node_create(parser, &keyword, arguments.arguments));
19722 parse_return(parser, node);
19723 return node;
19724 }
19725 default:
19726 assert(false && "unreachable");
19727 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
19728 }
19729 }
19730 case PM_TOKEN_KEYWORD_SUPER: {
19731 parser_lex(parser);
19732
19733 pm_token_t keyword = parser->previous;
19734 pm_arguments_t arguments = { 0 };
19735 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
19736
19737 if (
19738 arguments.opening_loc.length == 0 &&
19739 arguments.arguments == NULL &&
19740 ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19741 ) {
19742 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
19743 }
19744
19745 return UP(pm_super_node_create(parser, &keyword, &arguments));
19746 }
19747 case PM_TOKEN_KEYWORD_YIELD: {
19748 parser_lex(parser);
19749
19750 pm_token_t keyword = parser->previous;
19751 pm_arguments_t arguments = { 0 };
19752 parse_arguments_list(parser, &arguments, false, flags, (uint16_t) (depth + 1));
19753
19754 // It's possible that we've parsed a block argument through our
19755 // call to parse_arguments_list. If we found one, we should mark it
19756 // as invalid and destroy it, as we don't have a place for it on the
19757 // yield node.
19758 if (arguments.block != NULL) {
19759 pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19760 pm_node_unreference(parser, arguments.block);
19761 arguments.block = NULL;
19762 }
19763
19764 pm_node_t *node = UP(pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc));
19765 if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19766
19767 return node;
19768 }
19769 case PM_TOKEN_KEYWORD_CLASS:
19770 return parse_class(parser, flags, depth);
19771 case PM_TOKEN_KEYWORD_DEF:
19772 return parse_def(parser, binding_power, flags, depth);
19773 case PM_TOKEN_KEYWORD_DEFINED: {
19774 parser_lex(parser);
19775
19776 pm_token_t keyword = parser->previous;
19777 pm_token_t lparen = { 0 };
19778 pm_token_t rparen = { 0 };
19779 pm_node_t *expression;
19780
19781 context_push(parser, PM_CONTEXT_DEFINED);
19782 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
19783
19784 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19785 lparen = parser->previous;
19786
19787 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19788 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19789 lparen = (pm_token_t) { 0 };
19790 } else {
19791 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19792
19793 if (!parser->recovering) {
19794 accept1(parser, PM_TOKEN_NEWLINE);
19795 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19796 rparen = parser->previous;
19797 }
19798 }
19799 } else {
19800 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19801 }
19802
19803 context_pop(parser);
19804 return UP(pm_defined_node_create(
19805 parser,
19806 NTOK2PTR(lparen),
19807 expression,
19808 NTOK2PTR(rparen),
19809 &keyword
19810 ));
19811 }
19812 case PM_TOKEN_KEYWORD_END_UPCASE: {
19813 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19814 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19815 }
19816
19817 parser_lex(parser);
19818 pm_token_t keyword = parser->previous;
19819
19820 if (context_def_p(parser)) {
19821 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19822 }
19823
19824 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19825 pm_token_t opening = parser->previous;
19826 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19827
19828 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19829 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous));
19830 }
19831 case PM_TOKEN_KEYWORD_FALSE:
19832 parser_lex(parser);
19833 return UP(pm_false_node_create(parser, &parser->previous));
19834 case PM_TOKEN_KEYWORD_FOR: {
19835 size_t opening_newline_index = token_newline_index(parser);
19836 parser_lex(parser);
19837
19838 pm_token_t for_keyword = parser->previous;
19839 pm_node_t *index;
19840
19841 context_push(parser, PM_CONTEXT_FOR_INDEX);
19842
19843 // First, parse out the first index expression.
19844 if (accept1(parser, PM_TOKEN_USTAR)) {
19845 pm_token_t star_operator = parser->previous;
19846 pm_node_t *name = NULL;
19847
19848 if (token_begins_expression_p(parser->current.type)) {
19849 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19850 }
19851
19852 index = UP(pm_splat_node_create(parser, &star_operator, name));
19853 } else if (token_begins_expression_p(parser->current.type)) {
19854 index = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19855 } else {
19856 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19857 index = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19858 }
19859
19860 // Now, if there are multiple index expressions, parse them out.
19861 if (match1(parser, PM_TOKEN_COMMA)) {
19862 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19863 } else {
19864 index = parse_target(parser, index, false, false);
19865 }
19866
19867 context_pop(parser);
19868 pm_do_loop_stack_push(parser, true);
19869
19870 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19871 pm_token_t in_keyword = parser->previous;
19872
19873 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19874 pm_do_loop_stack_pop(parser);
19875
19876 pm_token_t do_keyword = { 0 };
19877 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19878 do_keyword = parser->previous;
19879 } else {
19880 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19881 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_str(parser->current.type));
19882 }
19883 }
19884
19885 pm_statements_node_t *statements = NULL;
19886 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19887 statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19888 }
19889
19890 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19891 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19892
19893 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->previous));
19894 }
19895 case PM_TOKEN_KEYWORD_IF:
19896 if (parser_end_of_line_p(parser)) {
19897 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_KEYWORD_EOL);
19898 }
19899
19900 size_t opening_newline_index = token_newline_index(parser);
19901 bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19902 parser_lex(parser);
19903
19904 return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19905 case PM_TOKEN_KEYWORD_UNDEF: {
19906 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19907 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19908 }
19909
19910 parser_lex(parser);
19911 pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19912 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19913
19914 if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
19915 } else {
19916 pm_undef_node_append(parser->arena, undef, name);
19917
19918 while (match1(parser, PM_TOKEN_COMMA)) {
19919 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19920 parser_lex(parser);
19921 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19922
19923 if (PM_NODE_TYPE_P(name, PM_ERROR_RECOVERY_NODE)) {
19924 break;
19925 }
19926
19927 pm_undef_node_append(parser->arena, undef, name);
19928 }
19929 }
19930
19931 return UP(undef);
19932 }
19933 case PM_TOKEN_KEYWORD_NOT: {
19934 parser_lex(parser);
19935
19936 pm_token_t message = parser->previous;
19937 pm_arguments_t arguments = { 0 };
19938 pm_node_t *receiver = NULL;
19939
19940 // The `not` keyword without parentheses is only valid in contexts
19941 // where it would be parsed as an expression (i.e., at or below
19942 // the `not` binding power level). In other contexts (e.g., method
19943 // arguments, array elements, assignment right-hand sides),
19944 // parentheses are required: `not(x)`. An exception is made for
19945 // endless def bodies, where `not` is valid as both `arg` and
19946 // `command` (e.g., `def f = not 1`, `def f = not foo bar`).
19947 if (binding_power > PM_BINDING_POWER_NOT && !(flags & PM_PARSE_IN_ENDLESS_DEF) && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19948 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19949 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19950 } else {
19951 accept1(parser, PM_TOKEN_NEWLINE);
19952 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19953 }
19954
19955 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
19956 }
19957
19958 accept1(parser, PM_TOKEN_NEWLINE);
19959
19960 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19961 pm_token_t lparen = parser->previous;
19962
19963 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19964 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->previous, 0));
19965 } else {
19966 arguments.opening_loc = TOK2LOC(parser, &lparen);
19967 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, PM_PARSE_ACCEPTS_COMMAND_CALL | PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19968
19969 if (!parser->recovering) {
19970 accept1(parser, PM_TOKEN_NEWLINE);
19971 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19972 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
19973 }
19974 }
19975 } else {
19976 receiver = parse_expression(parser, PM_BINDING_POWER_NOT, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19977 }
19978
19979 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19980 }
19981 case PM_TOKEN_KEYWORD_UNLESS: {
19982 size_t opening_newline_index = token_newline_index(parser);
19983 parser_lex(parser);
19984
19985 return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19986 }
19987 case PM_TOKEN_KEYWORD_MODULE:
19988 return parse_module(parser, flags, depth);
19989 case PM_TOKEN_KEYWORD_NIL:
19990 parser_lex(parser);
19991 return UP(pm_nil_node_create(parser, &parser->previous));
19992 case PM_TOKEN_KEYWORD_REDO: {
19993 parser_lex(parser);
19994
19995 pm_node_t *node = UP(pm_redo_node_create(parser, &parser->previous));
19996 if (!parser->partial_script) parse_block_exit(parser, node);
19997
19998 return node;
19999 }
20000 case PM_TOKEN_KEYWORD_RETRY: {
20001 parser_lex(parser);
20002
20003 pm_node_t *node = UP(pm_retry_node_create(parser, &parser->previous));
20004 parse_retry(parser, node);
20005
20006 return node;
20007 }
20008 case PM_TOKEN_KEYWORD_SELF:
20009 parser_lex(parser);
20010 return UP(pm_self_node_create(parser, &parser->previous));
20011 case PM_TOKEN_KEYWORD_TRUE:
20012 parser_lex(parser);
20013 return UP(pm_true_node_create(parser, &parser->previous));
20014 case PM_TOKEN_KEYWORD_UNTIL: {
20015 size_t opening_newline_index = token_newline_index(parser);
20016
20017 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20018 pm_do_loop_stack_push(parser, true);
20019
20020 parser_lex(parser);
20021 pm_token_t keyword = parser->previous;
20022 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
20023
20024 pm_do_loop_stack_pop(parser);
20025 context_pop(parser);
20026
20027 pm_token_t do_keyword = { 0 };
20028 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20029 do_keyword = parser->previous;
20030 } else {
20031 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
20032 }
20033
20034 pm_statements_node_t *statements = NULL;
20035 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20036 pm_accepts_block_stack_push(parser, true);
20037 statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
20038 pm_accepts_block_stack_pop(parser);
20039 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20040 }
20041
20042 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20043 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
20044
20045 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
20046 }
20047 case PM_TOKEN_KEYWORD_WHILE: {
20048 size_t opening_newline_index = token_newline_index(parser);
20049
20050 context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
20051 pm_do_loop_stack_push(parser, true);
20052
20053 parser_lex(parser);
20054 pm_token_t keyword = parser->previous;
20055 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
20056
20057 pm_do_loop_stack_pop(parser);
20058 context_pop(parser);
20059
20060 pm_token_t do_keyword = { 0 };
20061 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
20062 do_keyword = parser->previous;
20063 } else {
20064 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
20065 }
20066
20067 pm_statements_node_t *statements = NULL;
20068 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
20069 pm_accepts_block_stack_push(parser, true);
20070 statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
20071 pm_accepts_block_stack_pop(parser);
20072 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
20073 }
20074
20075 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
20076 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
20077
20078 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->previous, predicate, statements, 0));
20079 }
20080 case PM_TOKEN_PERCENT_LOWER_I: {
20081 parser_lex(parser);
20082 pm_token_t opening = parser->previous;
20083 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20084 pm_node_t *current = NULL;
20085
20086 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20087 accept1(parser, PM_TOKEN_WORDS_SEP);
20088 if (match1(parser, PM_TOKEN_STRING_END)) break;
20089
20090 // Interpolation is not possible but nested heredocs can still lead to
20091 // consecutive (disjoint) string tokens when the final newline is escaped.
20092 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20093 // Record the string node, moving to interpolation if needed.
20094 if (current == NULL) {
20095 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->current, NULL));
20096 parser_lex(parser);
20097 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20098 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
20099 parser_lex(parser);
20100 pm_interpolated_symbol_node_append(parser->arena, (pm_interpolated_symbol_node_t *) current, string);
20101 } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20102 pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
20103 pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = parser->start + cast->value_loc.start, .end = parser->start + cast->value_loc.start + cast->value_loc.length };
20104 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->unescaped));
20105 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->previous, NULL));
20106 parser_lex(parser);
20107
20108 pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL);
20109 pm_interpolated_symbol_node_append(parser->arena, interpolated, first_string);
20110 pm_interpolated_symbol_node_append(parser->arena, interpolated, second_string);
20111
20112 // current is arena-allocated so no explicit free is needed.
20113 current = UP(interpolated);
20114 } else {
20115 assert(false && "unreachable");
20116 }
20117 }
20118
20119 if (current) {
20120 pm_array_node_elements_append(parser->arena, array, current);
20121 current = NULL;
20122 } else {
20123 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
20124 }
20125 }
20126
20127 pm_token_t closing = parser->current;
20128 if (match1(parser, PM_TOKEN_EOF)) {
20129 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
20130 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20131 } else {
20132 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
20133 }
20134 pm_array_node_close_set(parser, array, &closing);
20135
20136 return UP(array);
20137 }
20138 case PM_TOKEN_PERCENT_UPPER_I:
20139 return parse_symbol_array(parser, depth);
20140 case PM_TOKEN_PERCENT_LOWER_W: {
20141 parser_lex(parser);
20142 pm_token_t opening = parser->previous;
20143 pm_array_node_t *array = pm_array_node_create(parser, &opening);
20144 pm_node_t *current = NULL;
20145
20146 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20147 accept1(parser, PM_TOKEN_WORDS_SEP);
20148 if (match1(parser, PM_TOKEN_STRING_END)) break;
20149
20150 // Interpolation is not possible but nested heredocs can still lead to
20151 // consecutive (disjoint) string tokens when the final newline is escaped.
20152 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20153 pm_node_t *string = UP(pm_string_node_create_current_string(parser, NULL, &parser->current, NULL));
20154
20155 // Record the string node, moving to interpolation if needed.
20156 if (current == NULL) {
20157 current = string;
20158 } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20159 pm_interpolated_string_node_append(parser, (pm_interpolated_string_node_t *) current, string);
20160 } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20161 pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, NULL, NULL, NULL);
20162 pm_interpolated_string_node_append(parser, interpolated, current);
20163 pm_interpolated_string_node_append(parser, interpolated, string);
20164 current = UP(interpolated);
20165 } else {
20166 assert(false && "unreachable");
20167 }
20168 parser_lex(parser);
20169 }
20170
20171 if (current) {
20172 pm_array_node_elements_append(parser->arena, array, current);
20173 current = NULL;
20174 } else {
20175 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20176 }
20177 }
20178
20179 pm_token_t closing = parser->current;
20180 if (match1(parser, PM_TOKEN_EOF)) {
20181 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20182 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20183 } else {
20184 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20185 }
20186
20187 pm_array_node_close_set(parser, array, &closing);
20188 return UP(array);
20189 }
20190 case PM_TOKEN_PERCENT_UPPER_W:
20191 return parse_string_array(parser, depth);
20192 case PM_TOKEN_REGEXP_BEGIN: {
20193 pm_token_t opening = parser->current;
20194 parser_lex(parser);
20195
20196 if (match1(parser, PM_TOKEN_REGEXP_END)) {
20197 // If we get here, then we have an end immediately after a start. In
20198 // that case we'll create an empty content token and return an
20199 // uninterpolated regular expression.
20200 pm_token_t content = (pm_token_t) {
20201 .type = PM_TOKEN_STRING_CONTENT,
20202 .start = parser->previous.end,
20203 .end = parser->previous.end
20204 };
20205
20206 parser_lex(parser);
20207
20208 pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20209 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
20210 return UP(node);
20211 }
20212
20214
20215 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20216 // In this case we've hit string content so we know the regular
20217 // expression at least has something in it. We'll need to check if the
20218 // following token is the end (in which case we can return a plain
20219 // regular expression) or if it's not then it has interpolation.
20220 pm_string_t unescaped = parser->current_string;
20221 pm_token_t content = parser->current;
20222 parser_lex(parser);
20223
20224 // If we hit an end, then we can create a regular expression
20225 // node without interpolation, which can be represented more
20226 // succinctly and more easily compiled.
20227 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20228 pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20229
20230 // If we're not immediately followed by a =~, then we
20231 // parse and validate now. If it is followed by a =~,
20232 // then it will get parsed in the =~ handler where
20233 // named captures can also be extracted.
20234 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20235 pm_node_flag_set(UP(node), pm_regexp_parse(parser, node, NULL, NULL));
20236 }
20237
20238 return UP(node);
20239 }
20240
20241 // If we get here, then we have interpolation so we'll need to create
20242 // a regular expression node with interpolation.
20243 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20244
20245 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
20246 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20247 // This is extremely strange, but the first string part of a
20248 // regular expression will always be tagged as binary if we
20249 // are in a US-ASCII file, no matter its contents.
20250 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20251 }
20252
20253 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
20254 } else {
20255 // If the first part of the body of the regular expression is not a
20256 // string content, then we have interpolation and we need to create an
20257 // interpolated regular expression node.
20258 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20259 }
20260
20261 // Now that we're here and we have interpolation, we'll parse all of the
20262 // parts into the list.
20263 pm_node_t *part;
20264 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20265 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20266 pm_interpolated_regular_expression_node_append(parser->arena, interpolated, part);
20267 }
20268 }
20269
20270 pm_token_t closing = parser->current;
20271 if (match1(parser, PM_TOKEN_EOF)) {
20272 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20273 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20274 } else {
20275 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20276 }
20277
20278 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20279 return UP(interpolated);
20280 }
20281 case PM_TOKEN_BACKTICK:
20282 case PM_TOKEN_PERCENT_LOWER_X: {
20283 parser_lex(parser);
20284 pm_token_t opening = parser->previous;
20285
20286 // When we get here, we don't know if this string is going to have
20287 // interpolation or not, even though it is allowed. Still, we want to be
20288 // able to return a string node without interpolation if we can since
20289 // it'll be faster.
20290 if (match1(parser, PM_TOKEN_STRING_END)) {
20291 // If we get here, then we have an end immediately after a start. In
20292 // that case we'll create an empty content token and return an
20293 // uninterpolated string.
20294 pm_token_t content = (pm_token_t) {
20295 .type = PM_TOKEN_STRING_CONTENT,
20296 .start = parser->previous.end,
20297 .end = parser->previous.end
20298 };
20299
20300 parser_lex(parser);
20301 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->previous));
20302 }
20303
20305
20306 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20307 // In this case we've hit string content so we know the string
20308 // at least has something in it. We'll need to check if the
20309 // following token is the end (in which case we can return a
20310 // plain string) or if it's not then it has interpolation.
20311 pm_string_t unescaped = parser->current_string;
20312 pm_token_t content = parser->current;
20313 parser_lex(parser);
20314
20315 if (match1(parser, PM_TOKEN_STRING_END)) {
20316 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped));
20317 pm_node_flag_set(node, parse_unescaped_encoding(parser));
20318 parser_lex(parser);
20319 return node;
20320 }
20321
20322 // If we get here, then we have interpolation so we'll need to
20323 // create a string node with interpolation.
20324 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20325
20326 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->previous, NULL, &unescaped));
20327 pm_node_flag_set(part, parse_unescaped_encoding(parser));
20328
20329 pm_interpolated_xstring_node_append(parser->arena, node, part);
20330 } else {
20331 // If the first part of the body of the string is not a string
20332 // content, then we have interpolation and we need to create an
20333 // interpolated string node.
20334 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20335 }
20336
20337 pm_node_t *part;
20338 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20339 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20340 pm_interpolated_xstring_node_append(parser->arena, node, part);
20341 }
20342 }
20343
20344 pm_token_t closing = parser->current;
20345 if (match1(parser, PM_TOKEN_EOF)) {
20346 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20347 closing = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
20348 } else {
20349 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20350 }
20351 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
20352
20353 return UP(node);
20354 }
20355 case PM_TOKEN_USTAR: {
20356 parser_lex(parser);
20357
20358 // * operators at the beginning of expressions are only valid in the
20359 // context of a multiple assignment. We enforce that here. We'll
20360 // still lex past it though and create a missing node place.
20361 if (binding_power != PM_BINDING_POWER_STATEMENT) {
20362 pm_parser_err_prefix(parser, diag_id);
20363 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20364 }
20365
20366 pm_token_t operator = parser->previous;
20367 pm_node_t *name = NULL;
20368
20369 if (token_begins_expression_p(parser->current.type)) {
20370 name = parse_expression(parser, PM_BINDING_POWER_INDEX, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20371 }
20372
20373 pm_node_t *splat = UP(pm_splat_node_create(parser, &operator, name));
20374
20375 if (match1(parser, PM_TOKEN_COMMA)) {
20376 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20377 } else {
20378 return parse_target_validate(parser, splat, true);
20379 }
20380 }
20381 case PM_TOKEN_BANG: {
20382 if (binding_power > PM_BINDING_POWER_UNARY) {
20383 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20384 }
20385
20386 parser_lex(parser);
20387
20388 pm_token_t operator = parser->previous;
20389 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20390 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20391
20392 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20393 return UP(node);
20394 }
20395 case PM_TOKEN_TILDE: {
20396 if (binding_power > PM_BINDING_POWER_UNARY) {
20397 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20398 }
20399 parser_lex(parser);
20400
20401 pm_token_t operator = parser->previous;
20402 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20403 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20404
20405 return UP(node);
20406 }
20407 case PM_TOKEN_UMINUS: {
20408 if (binding_power > PM_BINDING_POWER_UNARY) {
20409 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20410 }
20411 parser_lex(parser);
20412
20413 pm_token_t operator = parser->previous;
20414 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20415 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20416
20417 return UP(node);
20418 }
20419 case PM_TOKEN_UMINUS_NUM: {
20420 parser_lex(parser);
20421
20422 pm_token_t operator = parser->previous;
20423 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20424
20425 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20426 pm_token_t exponent_operator = parser->previous;
20427 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20428 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
20429 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20430 } else {
20431 switch (PM_NODE_TYPE(node)) {
20432 case PM_INTEGER_NODE:
20433 case PM_FLOAT_NODE:
20434 case PM_RATIONAL_NODE:
20435 case PM_IMAGINARY_NODE:
20436 parse_negative_numeric(node);
20437 break;
20438 default:
20439 node = UP(pm_call_node_unary_create(parser, &operator, node, "-@"));
20440 break;
20441 }
20442 }
20443
20444 return node;
20445 }
20446 case PM_TOKEN_MINUS_GREATER: {
20447 int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20448 parser->lambda_enclosure_nesting = parser->enclosure_nesting;
20449
20450 size_t opening_newline_index = token_newline_index(parser);
20451 pm_accepts_block_stack_push(parser, true);
20452 parser_lex(parser);
20453
20454 pm_token_t operator = parser->previous;
20455 pm_parser_scope_push(parser, false);
20456
20457 pm_block_parameters_node_t *block_parameters;
20458
20459 switch (parser->current.type) {
20460 case PM_TOKEN_PARENTHESIS_LEFT: {
20461 pm_token_t opening = parser->current;
20462 parser_lex(parser);
20463
20464 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20465 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20466 } else {
20467 block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20468 }
20469
20470 accept1(parser, PM_TOKEN_NEWLINE);
20471 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20472
20473 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->previous);
20474 break;
20475 }
20476 case PM_CASE_PARAMETER: {
20477 pm_accepts_block_stack_push(parser, false);
20478 block_parameters = parse_block_parameters(parser, false, NULL, true, false, (uint16_t) (depth + 1));
20479 pm_accepts_block_stack_pop(parser);
20480 break;
20481 }
20482 default: {
20483 block_parameters = NULL;
20484 break;
20485 }
20486 }
20487
20488 pm_token_t opening;
20489 pm_node_t *body = NULL;
20490 parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20491
20492 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20493 opening = parser->previous;
20494
20495 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20496 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1)));
20497 }
20498
20499 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20500 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20501 } else {
20502 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20503 opening = parser->previous;
20504
20505 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20506 body = UP(parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1)));
20507 }
20508
20509 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20510 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20511 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20512 } else {
20513 parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20514 }
20515
20516 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &operator);
20517 }
20518
20519 pm_constant_id_list_t locals;
20520 pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20521 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &operator, &parser->previous);
20522
20523 pm_parser_scope_pop(parser);
20524 pm_accepts_block_stack_pop(parser);
20525
20526 return UP(pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body));
20527 }
20528 case PM_TOKEN_UPLUS: {
20529 if (binding_power > PM_BINDING_POWER_UNARY) {
20530 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20531 }
20532 parser_lex(parser);
20533
20534 pm_token_t operator = parser->previous;
20535 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20536 pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20537
20538 return UP(node);
20539 }
20540 case PM_TOKEN_STRING_BEGIN:
20541 return parse_strings(parser, NULL, flags & PM_PARSE_ACCEPTS_LABEL, (uint16_t) (depth + 1));
20542 case PM_TOKEN_SYMBOL_BEGIN: {
20543 pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20544 parser_lex(parser);
20545
20546 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20547 }
20548 default: {
20549 pm_context_t recoverable = context_recoverable(parser, &parser->current);
20550
20551 if (recoverable != PM_CONTEXT_NONE) {
20552 parser->recovering = true;
20553
20554 // If the given error is not the generic one, then we'll add it
20555 // here because it will provide more context in addition to the
20556 // recoverable error that we will also add.
20557 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20558 pm_parser_err_prefix(parser, diag_id);
20559 }
20560
20561 // If we get here, then we are assuming this token is closing a
20562 // parent context, so we'll indicate that to the user so that
20563 // they know how we behaved.
20564 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_str(parser->current.type), context_human(recoverable));
20565 } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20566 // We're going to make a special case here, because "cannot
20567 // parse expression" is pretty generic, and we know here that we
20568 // have an unexpected token.
20569 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_str(parser->current.type));
20570 } else {
20571 pm_parser_err_prefix(parser, diag_id);
20572 }
20573
20574 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->previous), PM_TOKEN_LENGTH(&parser->previous)));
20575 }
20576 }
20577}
20578
20588static pm_node_t *
20589parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20590 pm_node_t *value = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MATCH ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
20591
20592 // Assignments whose value is a command call (e.g., a = b c) can only
20593 // be followed by modifiers (if/unless/while/until/rescue) and not by
20594 // operators with higher binding power. If we find one, emit an error
20595 // and skip the operator and its right-hand side.
20596 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20597 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
20598 parser_lex(parser);
20599 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20600 }
20601
20602 // Contradicting binding powers, the right-hand-side value of the assignment
20603 // allows the `rescue` modifier.
20604 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20605 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20606
20607 pm_token_t rescue = parser->current;
20608 parser_lex(parser);
20609
20610 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20611 context_pop(parser);
20612
20613 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20614 }
20615
20616 return value;
20617}
20618
20623static void
20624parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20625 switch (PM_NODE_TYPE(node)) {
20626 case PM_BEGIN_NODE: {
20627 const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20628 if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20629 break;
20630 }
20631 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20633 pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20634 break;
20635 }
20636 case PM_PARENTHESES_NODE: {
20637 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20638 if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20639 break;
20640 }
20641 case PM_STATEMENTS_NODE: {
20642 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20643 const pm_node_t *statement;
20644
20645 PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20646 parse_assignment_value_local(parser, statement);
20647 }
20648 break;
20649 }
20650 default:
20651 break;
20652 }
20653}
20654
20667static pm_node_t *
20668parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
20669 bool permitted = true;
20670 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20671
20672 pm_node_t *value = parse_starred_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) : (previous_binding_power < PM_BINDING_POWER_MODIFIER ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0))), diag_id, (uint16_t) (depth + 1));
20673 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20674
20675 parse_assignment_value_local(parser, value);
20676 bool single_value = true;
20677
20678 // Block calls (command call + do block, e.g., `foo bar do end`) cannot
20679 // be followed by a comma to form a multi-value RHS because each element
20680 // of a multi-value assignment must be an `arg`, not a `block_call`.
20681 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && !pm_block_call_p(value) && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20682 single_value = false;
20683
20684 pm_array_node_t *array = pm_array_node_create(parser, NULL);
20685 pm_array_node_elements_append(parser->arena, array, value);
20686 value = UP(array);
20687
20688 while (accept1(parser, PM_TOKEN_COMMA)) {
20689 pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20690
20691 pm_array_node_elements_append(parser->arena, array, element);
20692 if (PM_NODE_TYPE_P(element, PM_ERROR_RECOVERY_NODE)) break;
20693
20694 parse_assignment_value_local(parser, element);
20695 }
20696 }
20697
20698 // Assignments whose value is a command call (e.g., a = b c) can only
20699 // be followed by modifiers (if/unless/while/until/rescue) and not by
20700 // operators with higher binding power. If we find one, emit an error
20701 // and skip the operator and its right-hand side.
20702 if (single_value && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER && (pm_command_call_value_p(value) || pm_block_call_p(value))) {
20703 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(parser->current.type));
20704 parser_lex(parser);
20705 parse_expression(parser, pm_binding_powers[parser->previous.type].right, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20706 }
20707
20708 // Contradicting binding powers, the right-hand-side value of the assignment
20709 // allows the `rescue` modifier.
20710 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20711 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20712
20713 pm_token_t rescue = parser->current;
20714 parser_lex(parser);
20715
20716 bool accepts_command_call_inner = false;
20717
20718 // RHS can accept command call iff the value is a call with arguments
20719 // but without parenthesis.
20720 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20721 pm_call_node_t *call_node = (pm_call_node_t *) value;
20722 if ((call_node->arguments != NULL) && (call_node->opening_loc.length == 0)) {
20723 accepts_command_call_inner = true;
20724 }
20725 }
20726
20727 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (accepts_command_call_inner ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20728 context_pop(parser);
20729
20730 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20731 }
20732
20733 return value;
20734}
20735
20743static void
20744parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20745 if (call_node->arguments != NULL) {
20746 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20747 pm_node_unreference(parser, UP(call_node->arguments));
20748 call_node->arguments = NULL;
20749 }
20750
20751 if (call_node->block != NULL) {
20752 pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20753 pm_node_unreference(parser, UP(call_node->block));
20754 call_node->block = NULL;
20755 }
20756}
20757
20758static PRISM_INLINE const uint8_t *
20759pm_named_capture_escape_hex(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20760 cursor++;
20761
20762 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20763 uint8_t value = escape_hexadecimal_digit(*cursor);
20764 cursor++;
20765
20766 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20767 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20768 cursor++;
20769 }
20770
20771 pm_buffer_append_byte(unescaped, value);
20772 } else {
20773 pm_buffer_append_string(unescaped, "\\x", 2);
20774 }
20775
20776 return cursor;
20777}
20778
20779static PRISM_INLINE const uint8_t *
20780pm_named_capture_escape_octal(pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end) {
20781 uint8_t value = (uint8_t) (*cursor - '0');
20782 cursor++;
20783
20784 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20785 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20786 cursor++;
20787
20788 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20789 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor - '0'));
20790 cursor++;
20791 }
20792 }
20793
20794 pm_buffer_append_byte(unescaped, value);
20795 return cursor;
20796}
20797
20798static PRISM_INLINE const uint8_t *
20799pm_named_capture_escape_unicode(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *cursor, const uint8_t *end, const pm_location_t *error_location) {
20800 const uint8_t *start = cursor - 1;
20801 cursor++;
20802
20803 if (cursor >= end) {
20804 pm_buffer_append_string(unescaped, "\\u", 2);
20805 return cursor;
20806 }
20807
20808 if (*cursor != '{') {
20809 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20810 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20811
20812 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20813 pm_buffer_append_string(unescaped, (const char *) start, (size_t) ((cursor + length) - start));
20814 }
20815
20816 return cursor + length;
20817 }
20818
20819 cursor++;
20820 for (;;) {
20821 while (cursor < end && *cursor == ' ') cursor++;
20822
20823 if (cursor >= end) break;
20824 if (*cursor == '}') {
20825 cursor++;
20826 break;
20827 }
20828
20829 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20830 if (length == 0) {
20831 break;
20832 }
20833 uint32_t value = escape_unicode(parser, cursor, length, error_location, 0);
20834
20835 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20836 cursor += length;
20837 }
20838
20839 return cursor;
20840}
20841
20842static void
20843pm_named_capture_escape(pm_parser_t *parser, pm_buffer_t *unescaped, const uint8_t *source, const size_t length, const uint8_t *cursor, const pm_location_t *error_location) {
20844 const uint8_t *end = source + length;
20845 pm_buffer_append_string(unescaped, (const char *) source, (size_t) (cursor - source));
20846
20847 for (;;) {
20848 if (++cursor >= end) {
20849 pm_buffer_append_byte(unescaped, '\\');
20850 return;
20851 }
20852
20853 switch (*cursor) {
20854 case 'x':
20855 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20856 break;
20857 case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7':
20858 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20859 break;
20860 case 'u':
20861 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20862 break;
20863 default:
20864 pm_buffer_append_byte(unescaped, '\\');
20865 break;
20866 }
20867
20868 const uint8_t *next_cursor = pm_memchr(cursor, '\\', (size_t) (end - cursor), parser->encoding_changed, parser->encoding);
20869 if (next_cursor == NULL) break;
20870
20871 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (next_cursor - cursor));
20872 cursor = next_cursor;
20873 }
20874
20875 pm_buffer_append_string(unescaped, (const char *) cursor, (size_t) (end - cursor));
20876}
20877
20882static void
20883parse_regular_expression_named_capture(pm_parser_t *parser, const pm_string_t *capture, bool shared, pm_regexp_name_data_t *callback_data) {
20884 pm_call_node_t *call = callback_data->call;
20885 pm_constant_id_list_t *names = &callback_data->names;
20886
20887 const uint8_t *source = pm_string_source(capture);
20888 size_t length = pm_string_length(capture);
20889 pm_buffer_t unescaped = { 0 };
20890
20891 // First, we need to handle escapes within the name of the capture group.
20892 // This is because regular expressions have three different representations
20893 // in prism. The first is the plain source code. The second is the
20894 // representation that will be sent to the regular expression engine, which
20895 // is the value of the "unescaped" field. This is poorly named, because it
20896 // actually still contains escapes, just a subset of them that the regular
20897 // expression engine knows how to handle. The third representation is fully
20898 // unescaped, which is what we need.
20899 const uint8_t *cursor = pm_memchr(source, '\\', length, parser->encoding_changed, parser->encoding);
20900 if (PRISM_UNLIKELY(cursor != NULL)) {
20901 pm_named_capture_escape(parser, &unescaped, source, length, cursor, shared ? NULL : &call->receiver->location);
20902 source = (const uint8_t *) pm_buffer_value(&unescaped);
20903 length = pm_buffer_length(&unescaped);
20904 }
20905
20906 const uint8_t *start;
20907 const uint8_t *end;
20908 pm_constant_id_t name;
20909
20910 // If the name of the capture group isn't a valid identifier, we do
20911 // not add it to the local table.
20912 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20913 pm_buffer_cleanup(&unescaped);
20914 return;
20915 }
20916
20917 if (shared) {
20918 // If the unescaped string is a slice of the source, then we can
20919 // copy the names directly. The pointers will line up.
20920 start = source;
20921 end = source + length;
20922 name = pm_parser_constant_id_raw(parser, start, end);
20923 } else {
20924 // Otherwise, the name is a slice of the malloc-ed owned string,
20925 // in which case we need to copy it out into a new string.
20926 start = parser->start + PM_NODE_START(call->receiver);
20927 end = parser->start + PM_NODE_END(call->receiver);
20928
20929 uint8_t *memory = (uint8_t *) pm_arena_alloc(parser->arena, length, 1);
20930 memcpy(memory, source, length);
20931 name = pm_parser_constant_id_owned(parser, memory, length);
20932 }
20933
20934 // Add this name to the list of constants if it is valid, not duplicated,
20935 // and not a keyword.
20936 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20937 pm_constant_id_list_append(parser->arena, names, name);
20938
20939 int depth;
20940 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20941 // If the local is not already a local but it is a keyword, then we
20942 // do not want to add a capture for this.
20943 if (pm_local_is_keyword((const char *) source, length)) {
20944 pm_buffer_cleanup(&unescaped);
20945 return;
20946 }
20947
20948 // If the identifier is not already a local, then we will add it to
20949 // the local table.
20950 pm_parser_local_add(parser, name, start, end, 0);
20951 }
20952
20953 // Here we lazily create the MatchWriteNode since we know we're
20954 // about to add a target.
20955 if (callback_data->match == NULL) {
20956 callback_data->match = pm_match_write_node_create(parser, call);
20957 }
20958
20959 // Next, create the local variable target and add it to the list of
20960 // targets for the match.
20961 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20962 pm_node_list_append(parser->arena, &callback_data->match->targets, target);
20963 }
20964
20965 pm_buffer_cleanup(&unescaped);
20966}
20967
20973static pm_node_t *
20974parse_interpolated_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20975 pm_regexp_name_data_t callback_data = {
20976 .call = call,
20977 .match = NULL,
20978 .names = { 0 },
20979 };
20980
20981 pm_regexp_parse_named_captures(parser, pm_string_source(content), pm_string_length(content), false, extended_mode, parse_regular_expression_named_capture, &callback_data);
20982
20983 if (callback_data.match != NULL) {
20984 return UP(callback_data.match);
20985 } else {
20986 return UP(call);
20987 }
20988}
20989
20990static PRISM_INLINE pm_node_t *
20991parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, uint8_t flags, uint16_t depth) {
20992 pm_token_t token = parser->current;
20993
20994 switch (token.type) {
20995 case PM_TOKEN_EQUAL: {
20996 switch (PM_NODE_TYPE(node)) {
20997 case PM_CALL_NODE: {
20998 // If we have no arguments to the call node and we need this
20999 // to be a target then this is either a method call or a
21000 // local variable write. This _must_ happen before the value
21001 // is parsed because it could be referenced in the value.
21002 pm_call_node_t *call_node = (pm_call_node_t *) node;
21003 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21004 pm_parser_local_add_location(parser, &call_node->message_loc, 0);
21005 }
21006 }
21008 case PM_CASE_WRITABLE: {
21009 // When we have `it = value`, we need to add `it` as a local
21010 // variable before parsing the value, in case the value
21011 // references the variable.
21012 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
21013 pm_parser_local_add_location(parser, &node->location, 0);
21014 }
21015
21016 parser_lex(parser);
21017 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21018
21019 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
21020 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
21021 }
21022
21023 return parse_write(parser, node, &token, value);
21024 }
21025 case PM_SPLAT_NODE: {
21026 pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
21027 pm_multi_target_node_targets_append(parser, multi_target, node);
21028
21029 parser_lex(parser);
21030 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21031 return parse_write(parser, UP(multi_target), &token, value);
21032 }
21033 case PM_SOURCE_ENCODING_NODE:
21034 case PM_FALSE_NODE:
21035 case PM_SOURCE_FILE_NODE:
21036 case PM_SOURCE_LINE_NODE:
21037 case PM_NIL_NODE:
21038 case PM_SELF_NODE:
21039 case PM_TRUE_NODE: {
21040 // In these special cases, we have specific error messages
21041 // and we will replace them with local variable writes.
21042 parser_lex(parser);
21043 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
21044 return parse_unwriteable_write(parser, node, &token, value);
21045 }
21046 default:
21047 // In this case we have an = sign, but we don't know what
21048 // it's for. We need to treat it as an error. We'll mark it
21049 // as an error and skip past it.
21050 parser_lex(parser);
21051 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
21052 return node;
21053 }
21054 }
21055 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
21056 switch (PM_NODE_TYPE(node)) {
21057 case PM_BACK_REFERENCE_READ_NODE:
21058 case PM_NUMBERED_REFERENCE_READ_NODE:
21059 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21061 case PM_GLOBAL_VARIABLE_READ_NODE: {
21062 parser_lex(parser);
21063
21064 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21065 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
21066
21067 return result;
21068 }
21069 case PM_CLASS_VARIABLE_READ_NODE: {
21070 parser_lex(parser);
21071
21072 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21073 pm_node_t *result = UP(pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21074
21075 return result;
21076 }
21077 case PM_CONSTANT_PATH_NODE: {
21078 parser_lex(parser);
21079
21080 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21081 pm_node_t *write = UP(pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21082
21083 return parse_shareable_constant_write(parser, write);
21084 }
21085 case PM_CONSTANT_READ_NODE: {
21086 parser_lex(parser);
21087
21088 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21089 pm_node_t *write = UP(pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21090
21091 return parse_shareable_constant_write(parser, write);
21092 }
21093 case PM_INSTANCE_VARIABLE_READ_NODE: {
21094 parser_lex(parser);
21095
21096 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21097 pm_node_t *result = UP(pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21098
21099 return result;
21100 }
21101 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21102 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21103 parser_lex(parser);
21104
21105 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21106 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
21107
21108 pm_node_unreference(parser, node);
21109 return result;
21110 }
21111 case PM_LOCAL_VARIABLE_READ_NODE: {
21112 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21113 PM_PARSER_ERR_FORMAT(parser, node->location.start, node->location.length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + node->location.start);
21114 pm_node_unreference(parser, node);
21115 }
21116
21118 parser_lex(parser);
21119
21120 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21121 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21122
21123 return result;
21124 }
21125 case PM_CALL_NODE: {
21126 pm_call_node_t *cast = (pm_call_node_t *) node;
21127
21128 // If we have a vcall (a method with no arguments and no
21129 // receiver that could have been a local variable) then we
21130 // will transform it into a local variable write.
21131 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21132 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21133 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21134 parser_lex(parser);
21135
21136 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21137 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21138
21139 return result;
21140 }
21141
21142 // Move past the token here so that we have already added
21143 // the local variable by this point.
21144 parser_lex(parser);
21145
21146 // If there is no call operator and the message is "[]" then
21147 // this is an aref expression, and we can transform it into
21148 // an aset expression.
21149 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21150 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21151 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
21152 }
21153
21154 // If this node cannot be writable, then we have an error.
21155 if (pm_call_node_writable_p(parser, cast)) {
21156 parse_write_name(parser, &cast->name);
21157 } else {
21158 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21159 }
21160
21161 parse_call_operator_write(parser, cast, &token);
21162 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21163 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
21164 }
21165 case PM_MULTI_WRITE_NODE: {
21166 parser_lex(parser);
21167 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21168 return node;
21169 }
21170 default:
21171 parser_lex(parser);
21172
21173 // In this case we have an &&= sign, but we don't know what it's for.
21174 // We need to treat it as an error. For now, we'll mark it as an error
21175 // and just skip right past it.
21176 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21177 return node;
21178 }
21179 }
21180 case PM_TOKEN_PIPE_PIPE_EQUAL: {
21181 switch (PM_NODE_TYPE(node)) {
21182 case PM_BACK_REFERENCE_READ_NODE:
21183 case PM_NUMBERED_REFERENCE_READ_NODE:
21184 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21186 case PM_GLOBAL_VARIABLE_READ_NODE: {
21187 parser_lex(parser);
21188
21189 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21190 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
21191
21192 return result;
21193 }
21194 case PM_CLASS_VARIABLE_READ_NODE: {
21195 parser_lex(parser);
21196
21197 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21198 pm_node_t *result = UP(pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21199
21200 return result;
21201 }
21202 case PM_CONSTANT_PATH_NODE: {
21203 parser_lex(parser);
21204
21205 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21206 pm_node_t *write = UP(pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21207
21208 return parse_shareable_constant_write(parser, write);
21209 }
21210 case PM_CONSTANT_READ_NODE: {
21211 parser_lex(parser);
21212
21213 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21214 pm_node_t *write = UP(pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21215
21216 return parse_shareable_constant_write(parser, write);
21217 }
21218 case PM_INSTANCE_VARIABLE_READ_NODE: {
21219 parser_lex(parser);
21220
21221 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21222 pm_node_t *result = UP(pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21223
21224 return result;
21225 }
21226 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21227 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21228 parser_lex(parser);
21229
21230 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21231 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
21232
21233 pm_node_unreference(parser, node);
21234 return result;
21235 }
21236 case PM_LOCAL_VARIABLE_READ_NODE: {
21237 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21238 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21239 pm_node_unreference(parser, node);
21240 }
21241
21243 parser_lex(parser);
21244
21245 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21246 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21247
21248 return result;
21249 }
21250 case PM_CALL_NODE: {
21251 pm_call_node_t *cast = (pm_call_node_t *) node;
21252
21253 // If we have a vcall (a method with no arguments and no
21254 // receiver that could have been a local variable) then we
21255 // will transform it into a local variable write.
21256 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21257 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21258 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21259 parser_lex(parser);
21260
21261 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21262 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21263
21264 return result;
21265 }
21266
21267 // Move past the token here so that we have already added
21268 // the local variable by this point.
21269 parser_lex(parser);
21270
21271 // If there is no call operator and the message is "[]" then
21272 // this is an aref expression, and we can transform it into
21273 // an aset expression.
21274 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21275 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21276 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
21277 }
21278
21279 // If this node cannot be writable, then we have an error.
21280 if (pm_call_node_writable_p(parser, cast)) {
21281 parse_write_name(parser, &cast->name);
21282 } else {
21283 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21284 }
21285
21286 parse_call_operator_write(parser, cast, &token);
21287 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21288 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
21289 }
21290 case PM_MULTI_WRITE_NODE: {
21291 parser_lex(parser);
21292 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21293 return node;
21294 }
21295 default:
21296 parser_lex(parser);
21297
21298 // In this case we have an ||= sign, but we don't know what it's for.
21299 // We need to treat it as an error. For now, we'll mark it as an error
21300 // and just skip right past it.
21301 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21302 return node;
21303 }
21304 }
21305 case PM_TOKEN_AMPERSAND_EQUAL:
21306 case PM_TOKEN_CARET_EQUAL:
21307 case PM_TOKEN_GREATER_GREATER_EQUAL:
21308 case PM_TOKEN_LESS_LESS_EQUAL:
21309 case PM_TOKEN_MINUS_EQUAL:
21310 case PM_TOKEN_PERCENT_EQUAL:
21311 case PM_TOKEN_PIPE_EQUAL:
21312 case PM_TOKEN_PLUS_EQUAL:
21313 case PM_TOKEN_SLASH_EQUAL:
21314 case PM_TOKEN_STAR_EQUAL:
21315 case PM_TOKEN_STAR_STAR_EQUAL: {
21316 switch (PM_NODE_TYPE(node)) {
21317 case PM_BACK_REFERENCE_READ_NODE:
21318 case PM_NUMBERED_REFERENCE_READ_NODE:
21319 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21321 case PM_GLOBAL_VARIABLE_READ_NODE: {
21322 parser_lex(parser);
21323
21324 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21325 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
21326
21327 return result;
21328 }
21329 case PM_CLASS_VARIABLE_READ_NODE: {
21330 parser_lex(parser);
21331
21332 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21333 pm_node_t *result = UP(pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value));
21334
21335 return result;
21336 }
21337 case PM_CONSTANT_PATH_NODE: {
21338 parser_lex(parser);
21339
21340 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21341 pm_node_t *write = UP(pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value));
21342
21343 return parse_shareable_constant_write(parser, write);
21344 }
21345 case PM_CONSTANT_READ_NODE: {
21346 parser_lex(parser);
21347
21348 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21349 pm_node_t *write = UP(pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value));
21350
21351 return parse_shareable_constant_write(parser, write);
21352 }
21353 case PM_INSTANCE_VARIABLE_READ_NODE: {
21354 parser_lex(parser);
21355
21356 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21357 pm_node_t *result = UP(pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value));
21358
21359 return result;
21360 }
21361 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
21362 pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
21363 parser_lex(parser);
21364
21365 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21366 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
21367
21368 pm_node_unreference(parser, node);
21369 return result;
21370 }
21371 case PM_LOCAL_VARIABLE_READ_NODE: {
21372 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
21373 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + PM_NODE_START(node));
21374 pm_node_unreference(parser, node);
21375 }
21376
21378 parser_lex(parser);
21379
21380 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21381 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth));
21382
21383 return result;
21384 }
21385 case PM_CALL_NODE: {
21386 parser_lex(parser);
21387 pm_call_node_t *cast = (pm_call_node_t *) node;
21388
21389 // If we have a vcall (a method with no arguments and no
21390 // receiver that could have been a local variable) then we
21391 // will transform it into a local variable write.
21392 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
21393 pm_refute_numbered_parameter(parser, cast->message_loc.start, cast->message_loc.length);
21394 pm_constant_id_t constant_id = pm_parser_local_add_location(parser, &cast->message_loc, 1);
21395 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21396 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
21397
21398 return result;
21399 }
21400
21401 // If there is no call operator and the message is "[]" then
21402 // this is an aref expression, and we can transform it into
21403 // an aset expression.
21404 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21405 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21406 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
21407 }
21408
21409 // If this node cannot be writable, then we have an error.
21410 if (pm_call_node_writable_p(parser, cast)) {
21411 parse_write_name(parser, &cast->name);
21412 } else {
21413 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21414 }
21415
21416 parse_call_operator_write(parser, cast, &token);
21417 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, flags, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21418 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
21419 }
21420 case PM_MULTI_WRITE_NODE: {
21421 parser_lex(parser);
21422 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21423 return node;
21424 }
21425 default:
21426 parser_lex(parser);
21427
21428 // In this case we have an operator but we don't know what it's for.
21429 // We need to treat it as an error. For now, we'll mark it as an error
21430 // and just skip right past it.
21431 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_str(parser->current.type));
21432 return node;
21433 }
21434 }
21435 case PM_TOKEN_AMPERSAND_AMPERSAND:
21436 case PM_TOKEN_KEYWORD_AND: {
21437 parser_lex(parser);
21438
21439 pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_AND ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21440 return UP(pm_and_node_create(parser, node, &token, right));
21441 }
21442 case PM_TOKEN_KEYWORD_OR:
21443 case PM_TOKEN_PIPE_PIPE: {
21444 parser_lex(parser);
21445
21446 pm_node_t *right = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | (parser->previous.type == PM_TOKEN_KEYWORD_OR ? PM_PARSE_ACCEPTS_COMMAND_CALL : 0)), PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21447 return UP(pm_or_node_create(parser, node, &token, right));
21448 }
21449 case PM_TOKEN_EQUAL_TILDE: {
21450 // Note that we _must_ parse the value before adding the local
21451 // variables in order to properly mirror the behavior of Ruby. For
21452 // example,
21453 //
21454 // /(?<foo>bar)/ =~ foo
21455 //
21456 // In this case, `foo` should be a method call and not a local yet.
21457 parser_lex(parser);
21458 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21459
21460 // By default, we're going to create a call node and then return it.
21461 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21462 pm_node_t *result = UP(call);
21463
21464 // If the receiver of this =~ is a regular expression node, then we
21465 // need to introduce local variables for it based on its named
21466 // capture groups.
21467 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21468 // It's possible to have an interpolated regular expression node
21469 // that only contains strings. This is because it can be split
21470 // up by a heredoc. In this case we need to concat the unescaped
21471 // strings together and then parse them as a regular expression.
21473
21474 bool interpolated = false;
21475 size_t total_length = 0;
21476
21477 pm_node_t *part;
21478 PM_NODE_LIST_FOREACH(parts, index, part) {
21479 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21480 total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21481 } else {
21482 interpolated = true;
21483 break;
21484 }
21485 }
21486
21487 if (!interpolated && total_length > 0) {
21488 void *memory = xmalloc(total_length);
21489 if (!memory) abort();
21490
21491 uint8_t *cursor = memory;
21492 PM_NODE_LIST_FOREACH(parts, index, part) {
21493 pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21494 size_t length = pm_string_length(unescaped);
21495
21496 memcpy(cursor, pm_string_source(unescaped), length);
21497 cursor += length;
21498 }
21499
21500 pm_string_t owned;
21501 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21502
21503 result = parse_interpolated_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21504 pm_string_cleanup(&owned);
21505 }
21506 } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21507 // If we have a regular expression node, then we can parse
21508 // the named captures and validate encoding in one pass.
21510
21511 pm_regexp_name_data_t name_data = {
21512 .call = call,
21513 .match = NULL,
21514 .names = { 0 },
21515 };
21516
21517 pm_node_flag_set(UP(regexp), pm_regexp_parse(parser, regexp, parse_regular_expression_named_capture, &name_data));
21518
21519 if (name_data.match != NULL) {
21520 result = UP(name_data.match);
21521 }
21522 }
21523
21524 return result;
21525 }
21526 case PM_TOKEN_UAMPERSAND:
21527 case PM_TOKEN_USTAR:
21528 case PM_TOKEN_USTAR_STAR:
21529 // The only times this will occur are when we are in an error state,
21530 // but we'll put them in here so that errors can propagate.
21531 case PM_TOKEN_BANG_EQUAL:
21532 case PM_TOKEN_BANG_TILDE:
21533 case PM_TOKEN_EQUAL_EQUAL:
21534 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21535 case PM_TOKEN_LESS_EQUAL_GREATER:
21536 case PM_TOKEN_CARET:
21537 case PM_TOKEN_PIPE:
21538 case PM_TOKEN_AMPERSAND:
21539 case PM_TOKEN_GREATER_GREATER:
21540 case PM_TOKEN_LESS_LESS:
21541 case PM_TOKEN_MINUS:
21542 case PM_TOKEN_PLUS:
21543 case PM_TOKEN_PERCENT:
21544 case PM_TOKEN_SLASH:
21545 case PM_TOKEN_STAR:
21546 case PM_TOKEN_STAR_STAR: {
21547 parser_lex(parser);
21548 pm_token_t operator = parser->previous;
21549 switch (PM_NODE_TYPE(node)) {
21550 case PM_RESCUE_MODIFIER_NODE: {
21552 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21553 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21554 }
21555 break;
21556 }
21557 case PM_AND_NODE: {
21558 pm_and_node_t *cast = (pm_and_node_t *) node;
21559 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21560 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21561 }
21562 break;
21563 }
21564 case PM_OR_NODE: {
21565 pm_or_node_t *cast = (pm_or_node_t *) node;
21566 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21567 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21568 }
21569 break;
21570 }
21571 default:
21572 break;
21573 }
21574
21575 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21576 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21577 }
21578 case PM_TOKEN_GREATER:
21579 case PM_TOKEN_GREATER_EQUAL:
21580 case PM_TOKEN_LESS:
21581 case PM_TOKEN_LESS_EQUAL: {
21582 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21583 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21584 }
21585
21586 parser_lex(parser);
21587 pm_node_t *argument = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21588 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21589 }
21590 case PM_TOKEN_AMPERSAND_DOT:
21591 case PM_TOKEN_DOT: {
21592 parser_lex(parser);
21593 pm_token_t operator = parser->previous;
21594 pm_arguments_t arguments = { 0 };
21595
21596 // This if statement handles the foo.() syntax.
21597 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21598 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21599 return UP(pm_call_node_shorthand_create(parser, node, &operator, &arguments));
21600 }
21601
21602 switch (PM_NODE_TYPE(node)) {
21603 case PM_RESCUE_MODIFIER_NODE: {
21605 if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21606 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21607 }
21608 break;
21609 }
21610 case PM_AND_NODE: {
21611 pm_and_node_t *cast = (pm_and_node_t *) node;
21612 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21613 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21614 }
21615 break;
21616 }
21617 case PM_OR_NODE: {
21618 pm_or_node_t *cast = (pm_or_node_t *) node;
21619 if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21620 PM_PARSER_ERR_TOKEN_FORMAT(parser, &operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_str(operator.type));
21621 }
21622 break;
21623 }
21624 default:
21625 break;
21626 }
21627
21628 pm_token_t message;
21629
21630 switch (parser->current.type) {
21631 case PM_CASE_OPERATOR:
21632 case PM_CASE_KEYWORD:
21633 case PM_TOKEN_CONSTANT:
21634 case PM_TOKEN_IDENTIFIER:
21635 case PM_TOKEN_METHOD_NAME: {
21636 parser_lex(parser);
21637 message = parser->previous;
21638 break;
21639 }
21640 default: {
21641 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_str(parser->current.type));
21642 message = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21643 }
21644 }
21645
21646 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21647 pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21648
21649 if (
21650 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21651 arguments.arguments == NULL &&
21652 arguments.opening_loc.length == 0 &&
21653 match1(parser, PM_TOKEN_COMMA)
21654 ) {
21655 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21656 } else {
21657 return UP(call);
21658 }
21659 }
21660 case PM_TOKEN_DOT_DOT:
21661 case PM_TOKEN_DOT_DOT_DOT: {
21662 parser_lex(parser);
21663
21664 pm_node_t *right = NULL;
21665 if (token_begins_expression_p(parser->current.type)) {
21666 right = parse_expression(parser, binding_power, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21667 }
21668
21669 return UP(pm_range_node_create(parser, node, &token, right));
21670 }
21671 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21672 pm_token_t keyword = parser->current;
21673 parser_lex(parser);
21674
21675 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21676 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21677 }
21678 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21679 pm_token_t keyword = parser->current;
21680 parser_lex(parser);
21681
21682 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21683 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21684 }
21685 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21686 parser_lex(parser);
21687 pm_statements_node_t *statements = pm_statements_node_create(parser);
21688 pm_statements_node_body_append(parser, statements, node, true);
21689
21690 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21691 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21692 }
21693 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21694 parser_lex(parser);
21695 pm_statements_node_t *statements = pm_statements_node_create(parser);
21696 pm_statements_node_body_append(parser, statements, node, true);
21697
21698 pm_node_t *predicate = parse_value_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21699 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21700 }
21701 case PM_TOKEN_QUESTION_MARK: {
21702 context_push(parser, PM_CONTEXT_TERNARY);
21703 pm_node_list_t current_block_exits = { 0 };
21704 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21705
21706 pm_token_t qmark = parser->current;
21707 parser_lex(parser);
21708
21709 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21710
21711 if (parser->recovering) {
21712 // If parsing the true expression of this ternary resulted in a syntax
21713 // error that we can recover from, then we're going to put missing nodes
21714 // and tokens into the remaining places. We want to be sure to do this
21715 // before the `expect` function call to make sure it doesn't
21716 // accidentally move past a ':' token that occurs after the syntax
21717 // error.
21718 pm_token_t colon = (pm_token_t) { .type = 0, .start = parser->previous.end, .end = parser->previous.end };
21719 pm_node_t *false_expression = UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21720
21721 context_pop(parser);
21722 pop_block_exits(parser, previous_block_exits);
21723 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21724 }
21725
21726 accept1(parser, PM_TOKEN_NEWLINE);
21727 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21728
21729 pm_token_t colon = parser->previous;
21730 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, flags & PM_PARSE_ACCEPTS_DO_BLOCK, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21731
21732 context_pop(parser);
21733 pop_block_exits(parser, previous_block_exits);
21734 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21735 }
21736 case PM_TOKEN_COLON_COLON: {
21737 parser_lex(parser);
21738 pm_token_t delimiter = parser->previous;
21739
21740 switch (parser->current.type) {
21741 case PM_TOKEN_CONSTANT: {
21742 parser_lex(parser);
21743 pm_node_t *path;
21744
21745 if (
21746 (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21747 ((flags & PM_PARSE_ACCEPTS_COMMAND_CALL) && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21748 ) {
21749 // If we have a constant immediately following a '::' operator, then
21750 // this can either be a constant path or a method call, depending on
21751 // what follows the constant.
21752 //
21753 // If we have parentheses, then this is a method call. That would
21754 // look like Foo::Bar().
21755 pm_token_t message = parser->previous;
21756 pm_arguments_t arguments = { 0 };
21757
21758 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21759 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21760 } else {
21761 // Otherwise, this is a constant path. That would look like Foo::Bar.
21762 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21763 }
21764
21765 // If this is followed by a comma then it is a multiple assignment.
21766 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21767 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21768 }
21769
21770 return path;
21771 }
21772 case PM_CASE_OPERATOR:
21773 case PM_CASE_KEYWORD:
21774 case PM_TOKEN_IDENTIFIER:
21775 case PM_TOKEN_METHOD_NAME: {
21776 parser_lex(parser);
21777 pm_token_t message = parser->previous;
21778
21779 // If we have an identifier following a '::' operator, then it is for
21780 // sure a method call.
21781 pm_arguments_t arguments = { 0 };
21782 parse_arguments_list(parser, &arguments, true, flags, (uint16_t) (depth + 1));
21783 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21784
21785 // If this is followed by a comma then it is a multiple assignment.
21786 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21787 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21788 }
21789
21790 return UP(call);
21791 }
21792 case PM_TOKEN_PARENTHESIS_LEFT: {
21793 // If we have a parenthesis following a '::' operator, then it is the
21794 // method call shorthand. That would look like Foo::(bar).
21795 pm_arguments_t arguments = { 0 };
21796 parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21797
21798 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21799 }
21800 default: {
21801 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21802 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->previous));
21803 }
21804 }
21805 }
21806 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21807 context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21808 parser_lex(parser);
21809 accept1(parser, PM_TOKEN_NEWLINE);
21810
21811 pm_node_t *value = parse_expression(parser, binding_power, (uint8_t) ((flags & PM_PARSE_ACCEPTS_DO_BLOCK) | PM_PARSE_ACCEPTS_COMMAND_CALL), PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21812 context_pop(parser);
21813
21814 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21815 }
21816 case PM_TOKEN_BRACKET_LEFT: {
21817 parser_lex(parser);
21818
21819 pm_arguments_t arguments = { 0 };
21820 arguments.opening_loc = TOK2LOC(parser, &parser->previous);
21821
21822 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21823 pm_accepts_block_stack_push(parser, true);
21824 parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint8_t) (flags & ~PM_PARSE_ACCEPTS_DO_BLOCK), (uint16_t) (depth + 1));
21825 pm_accepts_block_stack_pop(parser);
21826 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21827 }
21828
21829 arguments.closing_loc = TOK2LOC(parser, &parser->previous);
21830
21831 // If we have a comma after the closing bracket then this is a multiple
21832 // assignment and we should parse the targets.
21833 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21834 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21835 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21836 }
21837
21838 // If we're at the end of the arguments, we can now check if there is a
21839 // block node that starts with a {. If there is, then we can parse it and
21840 // add it to the arguments.
21841 pm_block_node_t *block = NULL;
21842 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21843 block = parse_block(parser, (uint16_t) (depth + 1));
21844 pm_arguments_validate_block(parser, &arguments, block);
21845 } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21846 block = parse_block(parser, (uint16_t) (depth + 1));
21847 }
21848
21849 if (block != NULL) {
21850 if (arguments.block != NULL) {
21851 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21852 if (arguments.arguments == NULL) {
21853 arguments.arguments = pm_arguments_node_create(parser);
21854 }
21855 pm_arguments_node_arguments_append(parser->arena, arguments.arguments, arguments.block);
21856 }
21857
21858 arguments.block = UP(block);
21859 }
21860
21861 return UP(pm_call_node_aref_create(parser, node, &arguments));
21862 }
21863 case PM_TOKEN_KEYWORD_IN: {
21864 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21865 parser->pattern_matching_newlines = true;
21866
21867 pm_token_t operator = parser->current;
21868 parser->command_start = false;
21869 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21870 parser_lex(parser);
21871
21872 pm_constant_id_list_t captures = { 0 };
21873 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21874
21875 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21876
21877 return UP(pm_match_predicate_node_create(parser, node, pattern, &operator));
21878 }
21879 case PM_TOKEN_EQUAL_GREATER: {
21880 bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21881 parser->pattern_matching_newlines = true;
21882
21883 pm_token_t operator = parser->current;
21884 parser->command_start = false;
21885 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21886 parser_lex(parser);
21887
21888 pm_constant_id_list_t captures = { 0 };
21889 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21890
21891 parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21892
21893 return UP(pm_match_required_node_create(parser, node, pattern, &operator));
21894 }
21895 default:
21896 assert(false && "unreachable");
21897 return NULL;
21898 }
21899}
21900
21901#undef PM_PARSE_PATTERN_SINGLE
21902#undef PM_PARSE_PATTERN_TOP
21903#undef PM_PARSE_PATTERN_MULTI
21904
21917static bool
21918parse_expression_terminator(pm_parser_t *parser, pm_node_t *node) {
21919 pm_binding_power_t left = pm_binding_powers[parser->current.type].left;
21920
21921 switch (PM_NODE_TYPE(node)) {
21922 case PM_MULTI_WRITE_NODE:
21923 case PM_RETURN_NODE:
21924 case PM_BREAK_NODE:
21925 case PM_NEXT_NODE:
21926 return left > PM_BINDING_POWER_MODIFIER;
21927 case PM_CLASS_VARIABLE_WRITE_NODE:
21928 case PM_CONSTANT_PATH_WRITE_NODE:
21929 case PM_CONSTANT_WRITE_NODE:
21930 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21931 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21932 case PM_LOCAL_VARIABLE_WRITE_NODE:
21933 return PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && left > PM_BINDING_POWER_MODIFIER;
21934 case PM_CALL_NODE: {
21935 // Calls with an implicit array on the right-hand side are
21936 // statements and can only be followed by modifiers.
21937 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY)) {
21938 return left > PM_BINDING_POWER_MODIFIER;
21939 }
21940
21941 // Command-style calls (including block commands like
21942 // `foo bar do end`) can only be followed by composition
21943 // (and/or) and modifier (if/unless/etc.) operators.
21944 if (pm_command_call_value_p(node)) {
21945 return left > PM_BINDING_POWER_COMPOSITION;
21946 }
21947
21948 // A block call (command with do-block, or any call chained
21949 // from one) can only be followed by call chaining (., ::,
21950 // &.), composition (and/or), and modifier operators.
21951 if (pm_block_call_p(node)) {
21952 return left > PM_BINDING_POWER_COMPOSITION && left < PM_BINDING_POWER_CALL;
21953 }
21954
21955 return false;
21956 }
21957 case PM_SUPER_NODE:
21958 case PM_YIELD_NODE:
21959 // Command-style super/yield (without parens) can only be followed
21960 // by composition and modifier operators.
21961 if (pm_command_call_value_p(node)) {
21962 return left > PM_BINDING_POWER_COMPOSITION;
21963 }
21964 return false;
21965 case PM_DEF_NODE:
21966 // An endless method whose body is a command-style call (e.g.,
21967 // `def f = foo bar`) is a command assignment and can only be
21968 // followed by modifiers.
21969 return left > PM_BINDING_POWER_MODIFIER && pm_command_call_value_p(node);
21970 case PM_RESCUE_MODIFIER_NODE:
21971 // A rescue modifier whose handler is a pattern match (=> or in)
21972 // produces a statement and cannot be followed by operators above
21973 // the modifier level.
21974 if (left > PM_BINDING_POWER_MODIFIER) {
21976 pm_node_t *rescue_expression = cast->rescue_expression;
21977 return PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE);
21978 }
21979 return false;
21980 default:
21981 return false;
21982 }
21983}
21984
21993static pm_node_t *
21994parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
21995 if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21996 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21997 return UP(pm_error_recovery_node_create(parser, PM_TOKEN_START(parser, &parser->current), PM_TOKEN_LENGTH(&parser->current)));
21998 }
21999
22000 pm_node_t *node = parse_expression_prefix(parser, binding_power, flags, diag_id, depth);
22001
22002 // Some prefix nodes are statements and can only be followed by modifiers
22003 // (if/unless/while/until/rescue) or nothing at all. We check these cheaply
22004 // here before entering the infix loop.
22005 switch (PM_NODE_TYPE(node)) {
22006 case PM_ERROR_RECOVERY_NODE:
22007 return node;
22008 case PM_PRE_EXECUTION_NODE:
22009 return node;
22010 case PM_POST_EXECUTION_NODE:
22011 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
22012 case PM_ALIAS_METHOD_NODE:
22013 case PM_UNDEF_NODE:
22014 if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
22015 return node;
22016 }
22017 break;
22018 case PM_CALL_NODE:
22019 case PM_SUPER_NODE:
22020 case PM_YIELD_NODE:
22021 case PM_DEF_NODE:
22022 if (parse_expression_terminator(parser, node)) {
22023 return node;
22024 }
22025 break;
22026 case PM_SYMBOL_NODE:
22027 if (pm_symbol_node_label_p(parser, node)) {
22028 return node;
22029 }
22030 break;
22031 default:
22032 break;
22033 }
22034
22035 // Look and see if the next token can be parsed as an infix operator. If it
22036 // can, then we'll parse it using parse_expression_infix.
22037 pm_binding_powers_t current_binding_powers;
22038 pm_token_type_t current_token_type;
22039
22040 while (
22041 current_token_type = parser->current.type,
22042 current_binding_powers = pm_binding_powers[current_token_type],
22043 binding_power <= current_binding_powers.left &&
22044 current_binding_powers.binary
22045 ) {
22046 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, flags, (uint16_t) (depth + 1));
22047 if (parse_expression_terminator(parser, node)) return node;
22048
22049 // If the operator is nonassoc and we should not be able to parse the
22050 // upcoming infix operator, break.
22051 if (current_binding_powers.nonassoc) {
22052 // If this is a non-assoc operator and we are about to parse the
22053 // exact same operator, then we need to add an error.
22054 if (match1(parser, current_token_type)) {
22055 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
22056 break;
22057 }
22058
22059 // If this is an endless range, then we need to reject a couple of
22060 // additional operators because it violates the normal operator
22061 // precedence rules. Those patterns are:
22062 //
22063 // 1.. & 2
22064 // 1.. * 2
22065 //
22066 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
22067 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
22068 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_str(parser->current.type), pm_token_str(current_token_type));
22069 break;
22070 }
22071
22072 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
22073 break;
22074 }
22075 } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
22076 break;
22077 }
22078 }
22079
22080 if (flags & PM_PARSE_ACCEPTS_COMMAND_CALL) {
22081 // A command-style method call is only accepted on method chains.
22082 // Thus, we check whether the parsed node can continue method chains.
22083 // The method chain can continue if the parsed node is one of the following five kinds:
22084 // (1) index access: foo[1]
22085 // (2) attribute access: foo.bar
22086 // (3) method call with parenthesis: foo.bar(1)
22087 // (4) method call with a block: foo.bar do end
22088 // (5) constant path: foo::Bar
22089 switch (node->type) {
22090 case PM_CALL_NODE: {
22091 pm_call_node_t *cast = (pm_call_node_t *)node;
22092 if (
22093 // (1) foo[1]
22094 !(
22095 cast->call_operator_loc.length == 0 &&
22096 cast->message_loc.length > 0 &&
22097 parser->start[cast->message_loc.start] == '[' &&
22098 parser->start[cast->message_loc.start + cast->message_loc.length - 1] == ']'
22099 ) &&
22100 // (2) foo.bar
22101 !(
22102 cast->call_operator_loc.length > 0 &&
22103 cast->arguments == NULL &&
22104 cast->block == NULL &&
22105 cast->opening_loc.length == 0
22106 ) &&
22107 // (3) foo.bar(1)
22108 !(
22109 cast->call_operator_loc.length > 0 &&
22110 cast->opening_loc.length > 0
22111 ) &&
22112 // (4) foo.bar do end
22113 !(
22114 cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
22115 )
22116 ) {
22117 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
22118 }
22119 break;
22120 }
22121 // (5) foo::Bar
22122 case PM_CONSTANT_PATH_NODE:
22123 break;
22124 default:
22125 flags &= (uint8_t) ~PM_PARSE_ACCEPTS_COMMAND_CALL;
22126 break;
22127 }
22128 }
22129
22130 if (context_terminator(parser->current_context->context, &parser->current)) {
22131 pm_binding_powers_t next_binding_powers = pm_binding_powers[parser->current.type];
22132 if (
22133 !next_binding_powers.binary ||
22134 binding_power > next_binding_powers.left ||
22135 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((pm_call_node_t *) node))
22136 ) {
22137 return node;
22138 }
22139 }
22140 }
22141
22142 return node;
22143}
22144
22149static pm_statements_node_t *
22150wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22151 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22152 if (statements == NULL) {
22153 statements = pm_statements_node_create(parser);
22154 }
22155
22156 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22157 pm_arguments_node_arguments_append(
22158 parser->arena,
22159 arguments,
22160 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2)))
22161 );
22162
22163 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
22164 parser,
22165 arguments,
22166 pm_parser_constant_id_constant(parser, "print", 5)
22167 )), true);
22168 }
22169
22170 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22171 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22172 if (statements == NULL) {
22173 statements = pm_statements_node_create(parser);
22174 }
22175
22176 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22177 pm_arguments_node_arguments_append(
22178 parser->arena,
22179 arguments,
22180 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2)))
22181 );
22182
22183 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22184 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver), "split", arguments);
22185
22186 pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22187 parser,
22188 pm_parser_constant_id_constant(parser, "$F", 2),
22189 UP(call)
22190 );
22191
22192 pm_statements_node_body_prepend(parser->arena, statements, UP(write));
22193 }
22194
22195 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22196 pm_arguments_node_arguments_append(
22197 parser->arena,
22198 arguments,
22199 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2)))
22200 );
22201
22202 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22203 pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22204 pm_keyword_hash_node_elements_append(parser->arena, keywords, UP(pm_assoc_node_create(
22205 parser,
22206 UP(pm_symbol_node_synthesized_create(parser, "chomp")),
22207 NULL,
22208 UP(pm_true_node_synthesized_create(parser))
22209 )));
22210
22211 pm_arguments_node_arguments_append(parser->arena, arguments, UP(keywords));
22212 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22213 }
22214
22215 pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22216 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
22217 parser,
22218 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4))),
22219 statements
22220 )), true);
22221
22222 statements = wrapped_statements;
22223 }
22224
22225 return statements;
22226}
22227
22231static pm_node_t *
22232parse_program(pm_parser_t *parser) {
22233 // If the current scope is NULL, then we want to push a new top level scope.
22234 // The current scope could exist in the event that we are parsing an eval
22235 // and the user has passed into scopes that already exist.
22236 if (parser->current_scope == NULL) {
22237 pm_parser_scope_push(parser, true);
22238 }
22239
22240 pm_node_list_t current_block_exits = { 0 };
22241 pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22242
22243 parser_lex(parser);
22244 pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22245
22246 if (statements != NULL && !parser->parsing_eval) {
22247 // If we have statements, then the top-level statement should be
22248 // explicitly checked as well. We have to do this here because
22249 // everywhere else we check all but the last statement.
22250 assert(statements->body.size > 0);
22251 pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22252 }
22253
22254 pm_constant_id_list_t locals;
22255 pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22256 pm_parser_scope_pop(parser);
22257
22258 // At the top level, see if we need to wrap the statements in a program
22259 // node with a while loop based on the options.
22260 if (parser->command_line & (PM_OPTIONS_COMMAND_LINE_P | PM_OPTIONS_COMMAND_LINE_N)) {
22261 statements = wrap_statements(parser, statements);
22262 } else {
22263 flush_block_exits(parser, previous_block_exits);
22264 }
22265
22266 // If this is an empty file, then we're still going to parse all of the
22267 // statements in order to gather up all of the comments and such. Here we'll
22268 // correct the location information.
22269 if (statements == NULL) {
22270 statements = pm_statements_node_create(parser);
22271 statements->base.location = (pm_location_t) { 0 };
22272 }
22273
22274 return UP(pm_program_node_create(parser, &locals, statements));
22275}
22276
22277/******************************************************************************/
22278/* External functions */
22279/******************************************************************************/
22280
22290static const char *
22291pm_strnstr(const char *big, const char *little, size_t big_length) {
22292 size_t little_length = strlen(little);
22293
22294 for (const char *max = big + big_length - little_length; big <= max; big++) {
22295 if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22296 }
22297
22298 return NULL;
22299}
22300
22301#ifdef _WIN32
22302#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22303#else
22309static void
22310pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22311 if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22312 pm_parser_warn(parser, U32(start - parser->start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
22313 }
22314}
22315#endif
22316
22321static void
22322pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22323 const char *switches = pm_strnstr(engine, " -", length);
22324 if (switches == NULL) return;
22325
22326 pm_options_t next_options = *options;
22327 options->shebang_callback(
22328 &next_options,
22329 (const uint8_t *) (switches + 1),
22330 length - ((size_t) (switches - engine)) - 1,
22331 options->shebang_callback_data
22332 );
22333
22334 size_t encoding_length;
22335 if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22336 const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22337 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22338 }
22339
22340 parser->command_line = next_options.command_line;
22341 parser->frozen_string_literal = next_options.frozen_string_literal;
22342}
22343
22347void
22348pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22349 assert(arena != NULL);
22350 assert(source != NULL);
22351
22352 *parser = (pm_parser_t) {
22353 .arena = arena,
22354 .metadata_arena = { 0 },
22355 .node_id = 0,
22356 .lex_state = PM_LEX_STATE_BEG,
22357 .enclosure_nesting = 0,
22358 .lambda_enclosure_nesting = -1,
22359 .brace_nesting = 0,
22360 .do_loop_stack = 0,
22361 .accepts_block_stack = 0,
22362 .lex_modes = {
22363 .index = 0,
22364 .stack = {{ .mode = PM_LEX_DEFAULT }},
22365 .current = &parser->lex_modes.stack[0],
22366 },
22367 .start = source,
22368 .end = source + size,
22369 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22370 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22371 .next_start = NULL,
22372 .heredoc_end = NULL,
22373 .data_loc = { 0 },
22374 .comment_list = { 0 },
22375 .magic_comment_list = { 0 },
22376 .warning_list = { 0 },
22377 .error_list = { 0 },
22378 .current_scope = NULL,
22379 .current_context = NULL,
22380 .encoding = PM_ENCODING_UTF_8_ENTRY,
22381 .encoding_changed_callback = NULL,
22382 .encoding_comment_start = source,
22383 .lex_callback = { 0 },
22384 .filepath = { 0 },
22385 .constant_pool = { 0 },
22386 .line_offsets = { 0 },
22387 .integer = { 0 },
22388 .current_string = PM_STRING_EMPTY,
22389 .start_line = 1,
22390 .explicit_encoding = NULL,
22391 .command_line = 0,
22392 .parsing_eval = false,
22393 .partial_script = false,
22394 .command_start = true,
22395 .recovering = false,
22396 .continuable = true,
22397 .encoding_locked = false,
22398 .encoding_changed = false,
22399 .pattern_matching_newlines = false,
22400 .in_keyword_arg = false,
22401 .current_block_exits = NULL,
22402 .semantic_token_seen = false,
22403 .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22404 .warn_mismatched_indentation = true
22405 };
22406
22407 /* Pre-size the arenas based on input size to reduce the number of block
22408 * allocations (and the kernel page zeroing they trigger). The ratios were
22409 * measured empirically: AST arena ~3.3x input, metadata arena ~1.1x input.
22410 * The reserve call is a no-op when the capacity is at or below the default
22411 * arena block size, so small inputs don't waste an extra allocation. */
22412 if (size <= SIZE_MAX / 4) pm_arena_reserve(arena, size * 4);
22413 if (size <= SIZE_MAX / 5 * 4) pm_arena_reserve(&parser->metadata_arena, size + size / 4);
22414
22415 /* Initialize the constant pool. Measured across 1532 Ruby stdlib files, the
22416 * bytes/constant ratio has a median of ~56 and a 90th percentile of ~135.
22417 * We use 120 as a balance between over-allocation waste and resize
22418 * frequency. Resizes are cheap with arena allocation, so we lean toward
22419 * under-estimating. */
22420 uint32_t constant_size = ((uint32_t) size) / 120;
22421 pm_constant_pool_init(&parser->metadata_arena, &parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22422
22423 /* Initialize the line offset list. Similar to the constant pool, we are
22424 * going to estimate the number of newlines that we will need based on the
22425 * size of the input. */
22426 size_t newline_size = size / 22;
22427 pm_line_offset_list_init(&parser->metadata_arena, &parser->line_offsets, newline_size < 4 ? 4 : newline_size);
22428
22429 // If options were provided to this parse, establish them here.
22430 if (options != NULL) {
22431 // filepath option
22432 parser->filepath = options->filepath;
22433
22434 // line option
22435 parser->start_line = options->line;
22436
22437 // encoding option
22438 size_t encoding_length = pm_string_length(&options->encoding);
22439 if (encoding_length > 0) {
22440 const uint8_t *encoding_source = pm_string_source(&options->encoding);
22441 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22442 }
22443
22444 // encoding_locked option
22445 parser->encoding_locked = options->encoding_locked;
22446
22447 // frozen_string_literal option
22448 parser->frozen_string_literal = options->frozen_string_literal;
22449
22450 // command_line option
22451 parser->command_line = options->command_line;
22452
22453 // version option
22454 parser->version = options->version;
22455
22456 // partial_script
22457 parser->partial_script = options->partial_script;
22458
22459 // scopes option
22460 parser->parsing_eval = options->scopes_count > 0;
22461 if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22462
22463 for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22464 const pm_options_scope_t *scope = pm_options_scope(options, scope_index);
22465 pm_parser_scope_push(parser, scope_index == 0);
22466
22467 // Scopes given from the outside are not allowed to have numbered
22468 // parameters.
22469 parser->current_scope->parameters = ((pm_scope_parameters_t) scope->forwarding) | PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22470
22471 for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22472 const pm_string_t *local = pm_options_scope_local(scope, local_index);
22473
22474 const uint8_t *source = pm_string_source(local);
22475 size_t length = pm_string_length(local);
22476
22477 uint8_t *allocated = (uint8_t *) pm_arena_alloc(&parser->metadata_arena, length, 1);
22478 memcpy(allocated, source, length);
22479 pm_parser_local_add_owned(parser, allocated, length);
22480 }
22481 }
22482 }
22483
22484 // Now that we have established the user-provided options, check if
22485 // a version was given and parse as the latest version otherwise.
22486 if (parser->version == PM_OPTIONS_VERSION_UNSET) {
22487 parser->version = PM_OPTIONS_VERSION_LATEST;
22488 }
22489
22490 pm_accepts_block_stack_push(parser, true);
22491
22492 // Skip past the UTF-8 BOM if it exists.
22493 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22494 parser->current.end += 3;
22495 parser->encoding_comment_start += 3;
22496
22497 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22498 parser->encoding = PM_ENCODING_UTF_8_ENTRY;
22499 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22500 }
22501 }
22502
22503 // If the -x command line flag is set, or the first shebang of the file does
22504 // not include "ruby", then we'll search for a shebang that does include
22505 // "ruby" and start parsing from there.
22506 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22507
22508 // If the first two bytes of the source are a shebang, then we will do a bit
22509 // of extra processing.
22510 //
22511 // First, we'll indicate that the encoding comment is at the end of the
22512 // shebang. This means that when a shebang is present the encoding comment
22513 // can begin on the second line.
22514 //
22515 // Second, we will check if the shebang includes "ruby". If it does, then we
22516 // we will start parsing from there. We will also potentially warning the
22517 // user if there is a carriage return at the end of the shebang. We will
22518 // also potentially call the shebang callback if this is the main script to
22519 // allow the caller to parse the shebang and find any command-line options.
22520 // If the shebang does not include "ruby" and this is the main script being
22521 // parsed, then we will start searching the file for a shebang that does
22522 // contain "ruby" as if -x were passed on the command line.
22523 const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
22524 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->current.end);
22525
22526 if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22527 const char *engine;
22528
22529 if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22530 if (newline != NULL) {
22531 parser->encoding_comment_start = newline + 1;
22532
22533 if (options == NULL || options->main_script) {
22534 pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22535 }
22536 }
22537
22538 if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22539 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22540 }
22541
22542 search_shebang = false;
22543 } else if (options != NULL && options->main_script && !parser->parsing_eval) {
22544 search_shebang = true;
22545 }
22546 }
22547
22548 // Here we're going to find the first shebang that includes "ruby" and start
22549 // parsing from there.
22550 if (search_shebang) {
22551 // If a shebang that includes "ruby" is not found, then we're going to a
22552 // a load error to the list of errors on the parser.
22553 bool found_shebang = false;
22554
22555 // This is going to point to the start of each line as we check it.
22556 // We'll maintain a moving window looking at each line at they come.
22557 const uint8_t *cursor = parser->start;
22558
22559 // The newline pointer points to the end of the current line that we're
22560 // considering. If it is NULL, then we're at the end of the file.
22561 const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22562
22563 while (newline != NULL) {
22564 pm_line_offset_list_append(&parser->metadata_arena, &parser->line_offsets, U32(newline - parser->start + 1));
22565
22566 cursor = newline + 1;
22567 newline = next_newline(cursor, parser->end - cursor);
22568
22569 size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22570 if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22571 const char *engine;
22572 if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22573 found_shebang = true;
22574
22575 if (newline != NULL) {
22576 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22577 parser->encoding_comment_start = newline + 1;
22578 }
22579
22580 if (options != NULL && options->shebang_callback != NULL) {
22581 pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22582 }
22583
22584 break;
22585 }
22586 }
22587 }
22588
22589 if (found_shebang) {
22590 parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22591 parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22592 } else {
22593 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22594 pm_line_offset_list_clear(&parser->line_offsets);
22595 }
22596 }
22597
22598 // The encoding comment can start after any amount of inline whitespace, so
22599 // here we'll advance it to the first non-inline-whitespace character so
22600 // that it is ready for future comparisons.
22601 parser->encoding_comment_start += pm_strspn_inline_whitespace(parser->encoding_comment_start, parser->end - parser->encoding_comment_start);
22602}
22603
22612pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) {
22613 pm_parser_t *parser = (pm_parser_t *) xmalloc(sizeof(pm_parser_t));
22614 if (parser == NULL) abort();
22615
22616 pm_parser_init(arena, parser, source, size, options);
22617 return parser;
22618}
22619
22623void
22624pm_parser_cleanup(pm_parser_t *parser) {
22625 pm_string_cleanup(&parser->filepath);
22626 pm_arena_cleanup(&parser->metadata_arena);
22627
22628 while (parser->current_scope != NULL) {
22629 // Normally, popping the scope doesn't free the locals since it is
22630 // assumed that ownership has transferred to the AST. However if we have
22631 // scopes while we're freeing the parser, it's likely they came from
22632 // eval scopes and we need to free them explicitly here.
22633 pm_parser_scope_pop(parser);
22634 }
22635
22636 while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22637 lex_mode_pop(parser);
22638 }
22639}
22640
22644void
22646 pm_parser_cleanup(parser);
22647 xfree_sized(parser, sizeof(pm_parser_t));
22648}
22649
22655static bool
22656pm_parse_err_is_fatal(pm_diagnostic_id_t diag_id) {
22657 switch (diag_id) {
22658 case PM_ERR_ARRAY_EXPRESSION_AFTER_STAR:
22659 case PM_ERR_BEGIN_UPCASE_BRACE:
22660 case PM_ERR_CLASS_VARIABLE_BARE:
22661 case PM_ERR_END_UPCASE_BRACE:
22662 case PM_ERR_ESCAPE_INVALID_HEXADECIMAL:
22663 case PM_ERR_ESCAPE_INVALID_UNICODE_LIST:
22664 case PM_ERR_ESCAPE_INVALID_UNICODE_SHORT:
22665 case PM_ERR_EXPRESSION_NOT_WRITABLE:
22666 case PM_ERR_EXPRESSION_NOT_WRITABLE_SELF:
22667 case PM_ERR_FLOAT_PARSE:
22668 case PM_ERR_GLOBAL_VARIABLE_BARE:
22669 case PM_ERR_HASH_KEY:
22670 case PM_ERR_HEREDOC_IDENTIFIER:
22671 case PM_ERR_INSTANCE_VARIABLE_BARE:
22672 case PM_ERR_INVALID_BLOCK_EXIT:
22673 case PM_ERR_INVALID_ENCODING_MAGIC_COMMENT:
22674 case PM_ERR_INVALID_FLOAT_EXPONENT:
22675 case PM_ERR_INVALID_NUMBER_BINARY:
22676 case PM_ERR_INVALID_NUMBER_DECIMAL:
22677 case PM_ERR_INVALID_NUMBER_HEXADECIMAL:
22678 case PM_ERR_INVALID_NUMBER_OCTAL:
22679 case PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING:
22680 case PM_ERR_NO_LOCAL_VARIABLE:
22681 case PM_ERR_PARAMETER_ORDER:
22682 case PM_ERR_STATEMENT_UNDEF:
22683 case PM_ERR_VOID_EXPRESSION:
22684 return true;
22685 default:
22686 return false;
22687 }
22688}
22689
22723static void
22724pm_parse_continuable(pm_parser_t *parser) {
22725 // If there are no errors then there is nothing to continue.
22726 if (parser->error_list.size == 0) {
22727 parser->continuable = false;
22728 return;
22729 }
22730
22731 if (!parser->continuable) return;
22732
22733 size_t source_length = (size_t) (parser->end - parser->start);
22734
22735 // First pass: check if there are any non-stray, non-fatal errors.
22736 bool has_non_stray_error = false;
22737 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22738 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE && error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT && !pm_parse_err_is_fatal(error->diag_id)) {
22739 has_non_stray_error = true;
22740 break;
22741 }
22742 }
22743
22744 // Second pass: check each error. We track the minimum source position
22745 // among non-stray, non-fatal errors seen so far in list order, which
22746 // lets us detect cascade stray tokens.
22747 size_t non_stray_min_start = SIZE_MAX;
22748
22749 for (pm_diagnostic_t *error = (pm_diagnostic_t *) parser->error_list.head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
22750 size_t error_start = (size_t) error->location.start;
22751 size_t error_end = error_start + (size_t) error->location.length;
22752 bool at_eof = error_end >= source_length;
22753
22754 // Fatal errors are non-continuable unless they occur at EOF.
22755 if (pm_parse_err_is_fatal(error->diag_id) && !at_eof) {
22756 parser->continuable = false;
22757 return;
22758 }
22759
22760 // Track non-stray, non-fatal error positions in list order.
22761 if (error->diag_id != PM_ERR_UNEXPECTED_TOKEN_IGNORE &&
22762 error->diag_id != PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT) {
22763 if (error_start < non_stray_min_start) non_stray_min_start = error_start;
22764 continue;
22765 }
22766
22767 // This is a stray token. Determine if it is a cascade effect
22768 // of a preceding error or genuinely stray.
22769
22770 // Rule (a): a non-stray error was seen earlier in the list at a
22771 // strictly earlier position — this stray is a cascade effect.
22772 if (non_stray_min_start < error_start) continue;
22773
22774 // Rule (b): this stray is at EOF with valid code before it.
22775 // Single-byte stray tokens at EOF (like `\` for line continuation)
22776 // are likely truncated tokens. Multi-byte stray tokens (like the
22777 // keyword `end`) need additional evidence that they are cascade
22778 // effects (i.e. non-stray errors exist elsewhere).
22779 if (at_eof && error_start > 0) {
22780 // Exception: closing delimiters at EOF are genuinely stray.
22781 if (error->location.length == 1) {
22782 const uint8_t *byte = parser->start + error_start;
22783 if (*byte == ')' || *byte == ']' || *byte == '}') {
22784 parser->continuable = false;
22785 return;
22786 }
22787
22788 // Single-byte non-delimiter stray at EOF: cascade.
22789 continue;
22790 }
22791
22792 // Multi-byte stray at EOF: cascade only if there are
22793 // non-stray errors (evidence of a preceding parse failure).
22794 if (has_non_stray_error) continue;
22795 }
22796
22797 // Rule (c): a stray `=` at the start of a line could be the
22798 // beginning of an embedded document (`=begin`). The remaining
22799 // bytes after `=` parse as an identifier, so the error is not
22800 // at EOF, but the construct is genuinely incomplete.
22801 if (error->location.length == 1) {
22802 const uint8_t *byte = parser->start + error_start;
22803 if (*byte == '=' && (error_start == 0 || *(byte - 1) == '\n')) continue;
22804 }
22805
22806 // This stray token is genuinely non-continuable.
22807 parser->continuable = false;
22808 return;
22809 }
22810}
22811
22815pm_node_t *
22817 pm_node_t *node = parse_program(parser);
22818 pm_parse_continuable(parser);
22819 return node;
22820}
22821
22828pm_node_t *
22829pm_parse_stream(pm_parser_t **parser, pm_arena_t *arena, pm_source_t *source, const pm_options_t *options) {
22830 bool eof = pm_source_stream_read(source);
22831
22832 pm_parser_t *tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
22833 pm_node_t *node = pm_parse(tmp);
22834
22835 while (!eof && tmp->error_list.size > 0) {
22836 eof = pm_source_stream_read(source);
22837
22838 pm_parser_free(tmp);
22839 pm_arena_cleanup(arena);
22840
22841 tmp = pm_parser_new(arena, pm_source_source(source), pm_source_length(source), options);
22842 node = pm_parse(tmp);
22843 }
22844
22845 *parser = tmp;
22846 return node;
22847}
22848
22849#undef PM_CASE_KEYWORD
22850#undef PM_CASE_OPERATOR
22851#undef PM_CASE_WRITABLE
22852#undef PM_STRING_EMPTY
22853
22854// We optionally support serializing to a binary string. For systems that don't
22855// want or need this functionality, it can be turned off with the
22856// PRISM_EXCLUDE_SERIALIZATION define.
22857#ifndef PRISM_EXCLUDE_SERIALIZATION
22858
22859static PRISM_INLINE void
22860pm_serialize_header(pm_buffer_t *buffer) {
22861 pm_buffer_append_string(buffer, "PRISM", 5);
22862 pm_buffer_append_byte(buffer, PRISM_VERSION_MAJOR);
22863 pm_buffer_append_byte(buffer, PRISM_VERSION_MINOR);
22864 pm_buffer_append_byte(buffer, PRISM_VERSION_PATCH);
22865 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22866}
22867
22871void
22872pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22873 pm_serialize_header(buffer);
22874 pm_serialize_content(parser, node, buffer);
22875 pm_buffer_append_byte(buffer, '\0');
22876}
22877
22882void
22883pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22884 pm_options_t options = { 0 };
22885 pm_options_read(&options, data);
22886
22887 pm_arena_t arena = { 0 };
22888 pm_parser_t parser;
22889 pm_parser_init(&arena, &parser, source, size, &options);
22890
22891 pm_node_t *node = pm_parse(&parser);
22892
22893 pm_serialize_header(buffer);
22894 pm_serialize_content(&parser, node, buffer);
22895 pm_buffer_append_byte(buffer, '\0');
22896
22897 pm_parser_cleanup(&parser);
22898 pm_arena_cleanup(&arena);
22899 pm_options_cleanup(&options);
22900}
22901
22906void
22907pm_serialize_parse_stream(pm_buffer_t *buffer, pm_source_t *source, const char *data) {
22908 pm_arena_t arena = { 0 };
22909 pm_parser_t *parser;
22910 pm_options_t options = { 0 };
22911 pm_options_read(&options, data);
22912
22913 pm_node_t *node = pm_parse_stream(&parser, &arena, source, &options);
22914 pm_serialize_header(buffer);
22915 pm_serialize_content(parser, node, buffer);
22916 pm_buffer_append_byte(buffer, '\0');
22917
22918 pm_parser_free(parser);
22919 pm_arena_cleanup(&arena);
22920 pm_options_cleanup(&options);
22921}
22922
22926void
22927pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22928 pm_options_t options = { 0 };
22929 pm_options_read(&options, data);
22930
22931 pm_arena_t arena = { 0 };
22932 pm_parser_t parser;
22933 pm_parser_init(&arena, &parser, source, size, &options);
22934
22935 pm_parse(&parser);
22936 pm_serialize_header(buffer);
22937 pm_serialize_encoding(parser.encoding, buffer);
22938 pm_buffer_append_varsint(buffer, parser.start_line);
22939 pm_serialize_comment_list(&parser.comment_list, buffer);
22940
22941 pm_parser_cleanup(&parser);
22942 pm_arena_cleanup(&arena);
22943 pm_options_cleanup(&options);
22944}
22945
22946#endif
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
Definition align.h:15
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition comments.h:18
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
A header file that defines macros to exclude certain features of the prism library.
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
Definition fallthrough.h:15
#define xmalloc
Old name of ruby_xmalloc.
Definition xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition xmalloc.h:55
int len
Length of the buffer.
Definition io.h:8
#define PRISM_INLINE
Old Visual Studio versions do not support the inline keyword, so we need to define it to be __inline.
Definition inline.h:12
VALUE type(ANYARGS)
ANYARGS-ed function type.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition options.h:96
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should not be frozen.
Definition options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made frozen.
Definition options.h:42
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition options.h:37
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition options.h:102
PRISM_EXPORTED_FUNCTION PRISM_NODISCARD pm_parser_t * pm_parser_new(pm_arena_t *arena, const uint8_t *source, size_t size, const pm_options_t *options) PRISM_NONNULL(1)
Allocate and initialize a parser with the given start and end pointers.
Definition prism.c:22612
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser) PRISM_NONNULL(1)
Free both the memory held by the given parser and the parser itself.
Definition prism.c:22645
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser) PRISM_NONNULL(1)
Initiate the parser with the given parser.
Definition prism.c:22816
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition node.h:18
The version of the Prism library.
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition version.h:29
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition version.h:24
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition version.h:19
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition version.h:14
The functions related to serializing the AST to a binary format.
Functions for parsing streams.
AndNode.
Definition ast.h:1291
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
Definition ast.h:1306
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
Definition ast.h:1319
ArgumentsNode.
Definition ast.h:1351
pm_node_t base
The embedded base node.
Definition ast.h:1353
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition ast.h:1363
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition prism.c:1763
pm_node_t * block
The optional block attached to the call.
Definition prism.c:1774
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition prism.c:1777
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition prism.c:1765
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition prism.c:1768
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition prism.c:1771
ArrayNode.
Definition ast.h:1381
struct pm_node_list elements
ArrayNode::elements.
Definition ast.h:1390
ArrayPatternNode.
Definition ast.h:1441
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
Definition ast.h:1459
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition ast.h:1499
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition ast.h:1509
AssocNode.
Definition ast.h:1524
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
Definition ast.h:1555
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
Definition ast.h:1542
BeginNode.
Definition ast.h:1647
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
Definition ast.h:1689
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition ast.h:1699
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
Definition ast.h:1669
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition ast.h:1679
pm_node_t base
The embedded base node.
Definition ast.h:1649
This struct represents a set of binding powers used for a given token.
Definition prism.c:12445
bool binary
Whether or not this token can be used as a binary operator.
Definition prism.c:12453
pm_binding_power_t left
The left binding power.
Definition prism.c:12447
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition prism.c:12459
pm_binding_power_t right
The right binding power.
Definition prism.c:12450
BlockLocalVariableNode.
Definition ast.h:1764
BlockNode.
Definition ast.h:1791
BlockParametersNode.
Definition ast.h:1919
CallNode.
Definition ast.h:2143
pm_location_t opening_loc
CallNode::opening_loc.
Definition ast.h:2204
pm_location_t closing_loc
CallNode::closing_loc.
Definition ast.h:2224
pm_constant_id_t name
CallNode::name.
Definition ast.h:2184
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
Definition ast.h:2214
pm_location_t equal_loc
CallNode::equal_loc.
Definition ast.h:2237
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition ast.h:2174
pm_location_t message_loc
CallNode::message_loc.
Definition ast.h:2194
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
Definition ast.h:2247
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
Definition ast.h:2161
CaseMatchNode.
Definition ast.h:2578
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition ast.h:2600
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
Definition ast.h:2610
CaseNode.
Definition ast.h:2647
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
Definition ast.h:2679
struct pm_node_list conditions
CaseNode::conditions.
Definition ast.h:2669
ClassVariableReadNode.
Definition ast.h:2936
ClassVariableTargetNode.
Definition ast.h:2964
ClassVariableWriteNode.
Definition ast.h:2986
A list of constant IDs.
size_t size
The number of constant ids in the list.
ConstantPathNode.
Definition ast.h:3195
ConstantPathTargetNode.
Definition ast.h:3330
ConstantReadNode.
Definition ast.h:3423
ConstantTargetNode.
Definition ast.h:3451
ConstantWriteNode.
Definition ast.h:3473
DefNode.
Definition ast.h:3535
pm_location_t equal_loc
DefNode::equal_loc.
Definition ast.h:3592
PM_NODE_ALIGNAS struct pm_node * body
DefNode::body.
Definition ast.h:3562
ElseNode.
Definition ast.h:3649
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
Definition ast.h:3661
EnsureNode.
Definition ast.h:3744
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
Definition ast.h:3756
FindPatternNode.
Definition ast.h:3823
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition ast.h:3887
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
Definition ast.h:3835
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition ast.h:3900
FlipFlopNode.
Definition ast.h:3918
FloatNode.
Definition ast.h:3950
double value
FloatNode::value.
Definition ast.h:3959
pm_node_t base
The embedded base node.
Definition ast.h:3952
ForwardingParameterNode.
Definition ast.h:4083
GlobalVariableReadNode.
Definition ast.h:4256
GlobalVariableTargetNode.
Definition ast.h:4284
GlobalVariableWriteNode.
Definition ast.h:4306
HashNode.
Definition ast.h:4367
struct pm_node_list elements
HashNode::elements.
Definition ast.h:4392
HashPatternNode.
Definition ast.h:4426
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
Definition ast.h:4441
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition ast.h:4480
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition ast.h:4493
IfNode.
Definition ast.h:4514
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
Definition ast.h:4573
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
Definition ast.h:4592
ImaginaryNode.
Definition ast.h:4619
InNode.
Definition ast.h:4695
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
Definition ast.h:4707
InstanceVariableReadNode.
Definition ast.h:5098
InstanceVariableTargetNode.
Definition ast.h:5126
InstanceVariableWriteNode.
Definition ast.h:5148
IntegerNode.
Definition ast.h:5215
pm_integer_t value
IntegerNode::value.
Definition ast.h:5224
pm_node_t base
The embedded base node.
Definition ast.h:5217
bool negative
Whether or not the integer is negative.
Definition integer.h:38
InterpolatedMatchLastLineNode.
Definition ast.h:5252
InterpolatedRegularExpressionNode.
Definition ast.h:5297
InterpolatedStringNode.
Definition ast.h:5333
pm_node_t base
The embedded base node.
Definition ast.h:5335
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition ast.h:5340
InterpolatedSymbolNode.
Definition ast.h:5365
InterpolatedXStringNode.
Definition ast.h:5397
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition ast.h:5404
pm_node_t base
The embedded base node.
Definition ast.h:5399
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition ast.h:5409
KeywordHashNode.
Definition ast.h:5466
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
LocalVariableReadNode.
Definition ast.h:5702
uint32_t depth
LocalVariableReadNode::depth.
Definition ast.h:5732
pm_constant_id_t name
LocalVariableReadNode::name.
Definition ast.h:5719
LocalVariableTargetNode.
Definition ast.h:5750
LocalVariableWriteNode.
Definition ast.h:5777
uint32_t depth
LocalVariableWriteNode::depth.
Definition ast.h:5803
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition ast.h:5790
This struct represents a slice in the source code, defined by an offset and a length.
Definition ast.h:554
uint32_t start
The offset of the location from the start of the source.
Definition ast.h:556
uint32_t length
The length of the location.
Definition ast.h:559
MatchLastLineNode.
Definition ast.h:5868
struct pm_node_list targets
MatchWriteNode::targets.
Definition ast.h:6035
MultiTargetNode.
Definition ast.h:6102
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition ast.h:6159
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition ast.h:6119
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition ast.h:6169
MultiWriteNode.
Definition ast.h:6184
A list of nodes in the source, most often used for lists of children.
Definition ast.h:567
size_t size
The number of nodes in the list.
Definition ast.h:569
struct pm_node ** nodes
The nodes in the list.
Definition ast.h:575
This is the base structure that represents a node in the syntax tree.
Definition ast.h:1065
pm_node_type_t type
This represents the type of the node.
Definition ast.h:1070
pm_location_t location
This is the location of the node in the source.
Definition ast.h:1088
OptionalParameterNode.
Definition ast.h:6478
OrNode.
Definition ast.h:6515
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
Definition ast.h:6543
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
Definition ast.h:6530
ParametersNode.
Definition ast.h:6569
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
Definition ast.h:6606
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
Definition ast.h:6586
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition ast.h:6601
ParenthesesNode.
Definition ast.h:6624
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
Definition ast.h:6631
RangeNode.
Definition ast.h:6854
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
Definition ast.h:6883
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
Definition ast.h:6869
RationalNode.
Definition ast.h:6911
pm_node_t base
The embedded base node.
Definition ast.h:6913
pm_integer_t numerator
RationalNode::numerator.
Definition ast.h:6922
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition prism.c:9735
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition prism.c:9740
pm_token_buffer_t base
The embedded base buffer.
Definition prism.c:9737
RegularExpressionNode.
Definition ast.h:6976
RequiredParameterNode.
Definition ast.h:7048
RescueModifierNode.
Definition ast.h:7070
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition ast.h:7087
RescueNode.
Definition ast.h:7107
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition ast.h:7144
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
Definition ast.h:7134
SplatNode.
Definition ast.h:7397
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
Definition ast.h:7409
StatementsNode.
Definition ast.h:7424
struct pm_node_list body
StatementsNode::body.
Definition ast.h:7431
pm_node_t base
The embedded base node.
Definition ast.h:7426
StringNode.
Definition ast.h:7458
pm_node_t base
The embedded base node.
Definition ast.h:7460
pm_string_t unescaped
StringNode::unescaped.
Definition ast.h:7480
pm_location_t content_loc
StringNode::content_loc.
Definition ast.h:7470
pm_location_t closing_loc
StringNode::closing_loc.
Definition ast.h:7475
pm_location_t opening_loc
StringNode::opening_loc.
Definition ast.h:7465
A generic string type that can have various ownership semantics.
Definition stringy.h:18
const uint8_t * source
A pointer to the start of the string.
Definition stringy.h:20
size_t length
The length of the string in bytes of memory.
Definition stringy.h:23
enum pm_string_t::@110 type
The type of the string.
SuperNode.
Definition ast.h:7500
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
SuperNode::arguments.
Definition ast.h:7519
pm_location_t lparen_loc
SuperNode::lparen_loc.
Definition ast.h:7512
PM_NODE_ALIGNAS struct pm_node * block
SuperNode::block.
Definition ast.h:7529
SymbolNode.
Definition ast.h:7552
pm_location_t value_loc
SymbolNode::value_loc.
Definition ast.h:7564
pm_string_t unescaped
SymbolNode::unescaped.
Definition ast.h:7574
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition prism.c:9709
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition prism.c:9714
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition prism.c:9720
This struct represents a token in the Ruby source.
Definition ast.h:526
const uint8_t * end
A pointer to the end location of the token in the source.
Definition ast.h:534
const uint8_t * start
A pointer to the start location of the token in the source.
Definition ast.h:531
pm_token_type_t type
The type of the token.
Definition ast.h:528
UndefNode.
Definition ast.h:7606
UnlessNode.
Definition ast.h:7636
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
Definition ast.h:7685
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition ast.h:7695
WhenNode.
Definition ast.h:7770
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.
Definition ast.h:7792
XStringNode.
Definition ast.h:7859
YieldNode.
Definition ast.h:7896
pm_location_t lparen_loc
YieldNode::lparen_loc.
Definition ast.h:7908
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
YieldNode::arguments.
Definition ast.h:7913
#define PRISM_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition unused.h:13