Ruby  3.4.0dev (2024-12-06 revision 892c46283a5ea4179500d951c9d4866c0051f27b)
prism.c
1 #include "prism.h"
2 
6 const char *
7 pm_version(void) {
8  return PRISM_VERSION;
9 }
10 
15 #define PM_TAB_WHITESPACE_SIZE 8
16 
17 // Macros for min/max.
18 #define MIN(a,b) (((a)<(b))?(a):(b))
19 #define MAX(a,b) (((a)>(b))?(a):(b))
20 
21 /******************************************************************************/
22 /* Lex mode manipulations */
23 /******************************************************************************/
24 
29 static inline uint8_t
30 lex_mode_incrementor(const uint8_t start) {
31  switch (start) {
32  case '(':
33  case '[':
34  case '{':
35  case '<':
36  return start;
37  default:
38  return '\0';
39  }
40 }
41 
46 static inline uint8_t
47 lex_mode_terminator(const uint8_t start) {
48  switch (start) {
49  case '(':
50  return ')';
51  case '[':
52  return ']';
53  case '{':
54  return '}';
55  case '<':
56  return '>';
57  default:
58  return start;
59  }
60 }
61 
67 static bool
68 lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69  lex_mode.prev = parser->lex_modes.current;
70  parser->lex_modes.index++;
71 
72  if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
73  parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
74  if (parser->lex_modes.current == NULL) return false;
75 
76  *parser->lex_modes.current = lex_mode;
77  } else {
78  parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79  parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80  }
81 
82  return true;
83 }
84 
88 static inline bool
89 lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90  uint8_t incrementor = lex_mode_incrementor(delimiter);
91  uint8_t terminator = lex_mode_terminator(delimiter);
92 
93  pm_lex_mode_t lex_mode = {
94  .mode = PM_LEX_LIST,
95  .as.list = {
96  .nesting = 0,
97  .interpolation = interpolation,
98  .incrementor = incrementor,
99  .terminator = terminator
100  }
101  };
102 
103  // These are the places where we need to split up the content of the list.
104  // We'll use strpbrk to find the first of these characters.
105  uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106  memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107  size_t index = 7;
108 
109  // Now we'll add the terminator to the list of breakpoints. If the
110  // terminator is not already a NULL byte, add it to the list.
111  if (terminator != '\0') {
112  breakpoints[index++] = terminator;
113  }
114 
115  // If interpolation is allowed, then we're going to check for the #
116  // character. Otherwise we'll only look for escapes and the terminator.
117  if (interpolation) {
118  breakpoints[index++] = '#';
119  }
120 
121  // If there is an incrementor, then we'll check for that as well.
122  if (incrementor != '\0') {
123  breakpoints[index++] = incrementor;
124  }
125 
126  parser->explicit_encoding = NULL;
127  return lex_mode_push(parser, lex_mode);
128 }
129 
135 static inline bool
136 lex_mode_push_list_eof(pm_parser_t *parser) {
137  return lex_mode_push_list(parser, false, '\0');
138 }
139 
143 static inline bool
144 lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145  pm_lex_mode_t lex_mode = {
146  .mode = PM_LEX_REGEXP,
147  .as.regexp = {
148  .nesting = 0,
149  .incrementor = incrementor,
150  .terminator = terminator
151  }
152  };
153 
154  // These are the places where we need to split up the content of the
155  // regular expression. We'll use strpbrk to find the first of these
156  // characters.
157  uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158  memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159  size_t index = 4;
160 
161  // First we'll add the terminator.
162  if (terminator != '\0') {
163  breakpoints[index++] = terminator;
164  }
165 
166  // Next, if there is an incrementor, then we'll check for that as well.
167  if (incrementor != '\0') {
168  breakpoints[index++] = incrementor;
169  }
170 
171  parser->explicit_encoding = NULL;
172  return lex_mode_push(parser, lex_mode);
173 }
174 
178 static inline bool
179 lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180  pm_lex_mode_t lex_mode = {
181  .mode = PM_LEX_STRING,
182  .as.string = {
183  .nesting = 0,
184  .interpolation = interpolation,
185  .label_allowed = label_allowed,
186  .incrementor = incrementor,
187  .terminator = terminator
188  }
189  };
190 
191  // These are the places where we need to split up the content of the
192  // string. We'll use strpbrk to find the first of these characters.
193  uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194  memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195  size_t index = 3;
196 
197  // Now add in the terminator. If the terminator is not already a NULL byte,
198  // then we'll add it.
199  if (terminator != '\0') {
200  breakpoints[index++] = terminator;
201  }
202 
203  // If interpolation is allowed, then we're going to check for the #
204  // character. Otherwise we'll only look for escapes and the terminator.
205  if (interpolation) {
206  breakpoints[index++] = '#';
207  }
208 
209  // If we have an incrementor, then we'll add that in as a breakpoint as
210  // well.
211  if (incrementor != '\0') {
212  breakpoints[index++] = incrementor;
213  }
214 
215  parser->explicit_encoding = NULL;
216  return lex_mode_push(parser, lex_mode);
217 }
218 
224 static inline bool
225 lex_mode_push_string_eof(pm_parser_t *parser) {
226  return lex_mode_push_string(parser, false, false, '\0', '\0');
227 }
228 
234 static void
235 lex_mode_pop(pm_parser_t *parser) {
236  if (parser->lex_modes.index == 0) {
237  parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238  } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239  parser->lex_modes.index--;
240  parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241  } else {
242  parser->lex_modes.index--;
243  pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244  xfree(parser->lex_modes.current);
245  parser->lex_modes.current = prev;
246  }
247 }
248 
252 static inline bool
253 lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254  return parser->lex_state & state;
255 }
256 
257 typedef enum {
258  PM_IGNORED_NEWLINE_NONE = 0,
259  PM_IGNORED_NEWLINE_ALL,
260  PM_IGNORED_NEWLINE_PATTERN
261 } pm_ignored_newline_type_t;
262 
263 static inline pm_ignored_newline_type_t
264 lex_state_ignored_p(pm_parser_t *parser) {
265  bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266 
267  if (ignored) {
268  return PM_IGNORED_NEWLINE_ALL;
269  } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270  return PM_IGNORED_NEWLINE_PATTERN;
271  } else {
272  return PM_IGNORED_NEWLINE_NONE;
273  }
274 }
275 
276 static inline bool
277 lex_state_beg_p(pm_parser_t *parser) {
278  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279 }
280 
281 static inline bool
282 lex_state_arg_p(pm_parser_t *parser) {
283  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284 }
285 
286 static inline bool
287 lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288  if (parser->current.end >= parser->end) {
289  return false;
290  }
291  return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292 }
293 
294 static inline bool
295 lex_state_end_p(pm_parser_t *parser) {
296  return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297 }
298 
302 static inline bool
303 lex_state_operator_p(pm_parser_t *parser) {
304  return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305 }
306 
311 static inline void
312 lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313  parser->lex_state = state;
314 }
315 
316 #ifndef PM_DEBUG_LOGGING
321 #define PM_DEBUG_LOGGING 0
322 #endif
323 
324 #if PM_DEBUG_LOGGING
325 PRISM_ATTRIBUTE_UNUSED static void
326 debug_state(pm_parser_t *parser) {
327  fprintf(stderr, "STATE: ");
328  bool first = true;
329 
330  if (parser->lex_state == PM_LEX_STATE_NONE) {
331  fprintf(stderr, "NONE\n");
332  return;
333  }
334 
335 #define CHECK_STATE(state) \
336  if (parser->lex_state & state) { \
337  if (!first) fprintf(stderr, "|"); \
338  fprintf(stderr, "%s", #state); \
339  first = false; \
340  }
341 
342  CHECK_STATE(PM_LEX_STATE_BEG)
343  CHECK_STATE(PM_LEX_STATE_END)
344  CHECK_STATE(PM_LEX_STATE_ENDARG)
345  CHECK_STATE(PM_LEX_STATE_ENDFN)
346  CHECK_STATE(PM_LEX_STATE_ARG)
347  CHECK_STATE(PM_LEX_STATE_CMDARG)
348  CHECK_STATE(PM_LEX_STATE_MID)
349  CHECK_STATE(PM_LEX_STATE_FNAME)
350  CHECK_STATE(PM_LEX_STATE_DOT)
351  CHECK_STATE(PM_LEX_STATE_CLASS)
352  CHECK_STATE(PM_LEX_STATE_LABEL)
353  CHECK_STATE(PM_LEX_STATE_LABELED)
354  CHECK_STATE(PM_LEX_STATE_FITEM)
355 
356 #undef CHECK_STATE
357 
358  fprintf(stderr, "\n");
359 }
360 
361 static void
362 debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363  fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364  debug_state(parser);
365  lex_state_set(parser, state);
366  fprintf(stderr, "Now: ");
367  debug_state(parser);
368  fprintf(stderr, "\n");
369 }
370 
371 #define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372 #endif
373 
374 /******************************************************************************/
375 /* Command-line macro helpers */
376 /******************************************************************************/
377 
379 #define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380 
382 #define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383 
385 #define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386 
388 #define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389 
391 #define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392 
394 #define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395 
397 #define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398 
399 /******************************************************************************/
400 /* Diagnostic-related functions */
401 /******************************************************************************/
402 
406 static inline void
407 pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408  pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409 }
410 
414 #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415  pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416 
421 static inline void
422 pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423  pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424 }
425 
430 #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431  PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432 
437 static inline void
438 pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439  pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440 }
441 
446 #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447  PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448 
453 #define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454  PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455 
460 static inline void
461 pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462  pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463 }
464 
469 static inline void
470 pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471  pm_parser_err(parser, token->start, token->end, diag_id);
472 }
473 
478 #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479  PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480 
485 #define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486  PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487 
491 static inline void
492 pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493  pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494 }
495 
500 static inline void
501 pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502  pm_parser_warn(parser, token->start, token->end, diag_id);
503 }
504 
509 static inline void
510 pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511  pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512 }
513 
517 #define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518  pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519 
524 #define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525  PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526 
531 #define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532  PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533 
538 #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540 
546 static void
547 pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548  PM_PARSER_ERR_FORMAT(
549  parser,
550  ident_start,
551  ident_start + ident_length,
552  PM_ERR_HEREDOC_TERM,
553  (int) ident_length,
554  (const char *) ident_start
555  );
556 }
557 
558 /******************************************************************************/
559 /* Scope-related functions */
560 /******************************************************************************/
561 
565 static bool
566 pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567  pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568  if (scope == NULL) return false;
569 
570  *scope = (pm_scope_t) {
571  .previous = parser->current_scope,
572  .locals = { 0 },
573  .parameters = PM_SCOPE_PARAMETERS_NONE,
574  .implicit_parameters = { 0 },
575  .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576  .closed = closed
577  };
578 
579  parser->current_scope = scope;
580  return true;
581 }
582 
587 static bool
588 pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589  pm_scope_t *scope = parser->current_scope;
590 
591  do {
592  if (scope->previous == NULL) return true;
593  if (scope->closed) return false;
594  } while ((scope = scope->previous) != NULL);
595 
596  assert(false && "unreachable");
597  return true;
598 }
599 
603 static pm_scope_t *
604 pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605  pm_scope_t *scope = parser->current_scope;
606 
607  while (depth-- > 0) {
608  assert(scope != NULL);
609  scope = scope->previous;
610  }
611 
612  return scope;
613 }
614 
615 typedef enum {
616  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619 } pm_scope_forwarding_param_check_result_t;
620 
621 static pm_scope_forwarding_param_check_result_t
622 pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623  pm_scope_t *scope = parser->current_scope;
624  bool conflict = false;
625 
626  while (scope != NULL) {
627  if (scope->parameters & mask) {
628  if (scope->closed) {
629  if (conflict) {
630  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631  } else {
632  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633  }
634  }
635 
636  conflict = true;
637  }
638 
639  if (scope->closed) break;
640  scope = scope->previous;
641  }
642 
643  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644 }
645 
646 static void
647 pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650  // Pass.
651  break;
652  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654  break;
655  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657  break;
658  }
659 }
660 
661 static void
662 pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665  // Pass.
666  break;
667  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669  break;
670  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672  break;
673  }
674 }
675 
676 static void
677 pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680  // Pass.
681  break;
682  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683  // This shouldn't happen, because ... is not allowed in the
684  // declaration of blocks. If we get here, we assume we already have
685  // an error for this.
686  break;
687  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689  break;
690  }
691 }
692 
693 static void
694 pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697  // Pass.
698  break;
699  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701  break;
702  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704  break;
705  }
706 }
707 
711 static inline pm_shareable_constant_value_t
712 pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713  return parser->current_scope->shareable_constant;
714 }
715 
720 static void
721 pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722  pm_scope_t *scope = parser->current_scope;
723 
724  do {
725  scope->shareable_constant = shareable_constant;
726  } while (!scope->closed && (scope = scope->previous) != NULL);
727 }
728 
729 /******************************************************************************/
730 /* Local variable-related functions */
731 /******************************************************************************/
732 
736 #define PM_LOCALS_HASH_THRESHOLD 9
737 
738 static void
739 pm_locals_free(pm_locals_t *locals) {
740  if (locals->capacity > 0) {
741  xfree(locals->locals);
742  }
743 }
744 
749 static uint32_t
750 pm_locals_hash(pm_constant_id_t name) {
751  name = ((name >> 16) ^ name) * 0x45d9f3b;
752  name = ((name >> 16) ^ name) * 0x45d9f3b;
753  name = (name >> 16) ^ name;
754  return name;
755 }
756 
761 static void
762 pm_locals_resize(pm_locals_t *locals) {
763  uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764  assert(next_capacity > locals->capacity);
765 
766  pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767  if (next_locals == NULL) abort();
768 
769  if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770  if (locals->size > 0) {
771  memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772  }
773  } else {
774  // If we just switched from a list to a hash, then we need to fill in
775  // the hash values of all of the locals.
776  bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777  uint32_t mask = next_capacity - 1;
778 
779  for (uint32_t index = 0; index < locals->capacity; index++) {
780  pm_local_t *local = &locals->locals[index];
781 
782  if (local->name != PM_CONSTANT_ID_UNSET) {
783  if (hash_needed) local->hash = pm_locals_hash(local->name);
784 
785  uint32_t hash = local->hash;
786  while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787  next_locals[hash & mask] = *local;
788  }
789  }
790  }
791 
792  pm_locals_free(locals);
793  locals->locals = next_locals;
794  locals->capacity = next_capacity;
795 }
796 
812 static bool
813 pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814  if (locals->size >= (locals->capacity / 4 * 3)) {
815  pm_locals_resize(locals);
816  }
817 
818  if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819  for (uint32_t index = 0; index < locals->capacity; index++) {
820  pm_local_t *local = &locals->locals[index];
821 
822  if (local->name == PM_CONSTANT_ID_UNSET) {
823  *local = (pm_local_t) {
824  .name = name,
825  .location = { .start = start, .end = end },
826  .index = locals->size++,
827  .reads = reads,
828  .hash = 0
829  };
830  return true;
831  } else if (local->name == name) {
832  return false;
833  }
834  }
835  } else {
836  uint32_t mask = locals->capacity - 1;
837  uint32_t hash = pm_locals_hash(name);
838  uint32_t initial_hash = hash;
839 
840  do {
841  pm_local_t *local = &locals->locals[hash & mask];
842 
843  if (local->name == PM_CONSTANT_ID_UNSET) {
844  *local = (pm_local_t) {
845  .name = name,
846  .location = { .start = start, .end = end },
847  .index = locals->size++,
848  .reads = reads,
849  .hash = initial_hash
850  };
851  return true;
852  } else if (local->name == name) {
853  return false;
854  } else {
855  hash++;
856  }
857  } while ((hash & mask) != initial_hash);
858  }
859 
860  assert(false && "unreachable");
861  return true;
862 }
863 
868 static uint32_t
869 pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870  if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871  for (uint32_t index = 0; index < locals->size; index++) {
872  pm_local_t *local = &locals->locals[index];
873  if (local->name == name) return index;
874  }
875  } else {
876  uint32_t mask = locals->capacity - 1;
877  uint32_t hash = pm_locals_hash(name);
878  uint32_t initial_hash = hash & mask;
879 
880  do {
881  pm_local_t *local = &locals->locals[hash & mask];
882 
883  if (local->name == PM_CONSTANT_ID_UNSET) {
884  return UINT32_MAX;
885  } else if (local->name == name) {
886  return hash & mask;
887  } else {
888  hash++;
889  }
890  } while ((hash & mask) != initial_hash);
891  }
892 
893  return UINT32_MAX;
894 }
895 
900 static void
901 pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902  uint32_t index = pm_locals_find(locals, name);
903  assert(index != UINT32_MAX);
904 
905  pm_local_t *local = &locals->locals[index];
906  assert(local->reads < UINT32_MAX);
907 
908  local->reads++;
909 }
910 
915 static void
916 pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917  uint32_t index = pm_locals_find(locals, name);
918  assert(index != UINT32_MAX);
919 
920  pm_local_t *local = &locals->locals[index];
921  assert(local->reads > 0);
922 
923  local->reads--;
924 }
925 
929 static uint32_t
930 pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931  uint32_t index = pm_locals_find(locals, name);
932  assert(index != UINT32_MAX);
933 
934  return locals->locals[index].reads;
935 }
936 
945 static void
946 pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
948 
949  // If we're still below the threshold for switching to a hash, then we only
950  // need to loop over the locals until we hit the size because the locals are
951  // stored in a list.
952  uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953 
954  // We will only warn for unused variables if we're not at the top level, or
955  // if we're parsing a file outside of eval or -e.
956  bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957 
958  for (uint32_t index = 0; index < capacity; index++) {
959  pm_local_t *local = &locals->locals[index];
960 
961  if (local->name != PM_CONSTANT_ID_UNSET) {
962  pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963 
964  if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966 
967  if (constant->length >= 1 && *constant->start != '_') {
968  PM_PARSER_WARN_FORMAT(
969  parser,
970  local->location.start,
971  local->location.end,
972  PM_WARN_UNUSED_LOCAL_VARIABLE,
973  (int) constant->length,
974  (const char *) constant->start
975  );
976  }
977  }
978  }
979  }
980 }
981 
982 /******************************************************************************/
983 /* Node-related functions */
984 /******************************************************************************/
985 
989 static inline pm_constant_id_t
990 pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991  return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992 }
993 
997 static inline pm_constant_id_t
998 pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999  return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000 }
1001 
1005 static inline pm_constant_id_t
1006 pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007  return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008 }
1009 
1013 static inline pm_constant_id_t
1014 pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015  return pm_parser_constant_id_location(parser, token->start, token->end);
1016 }
1017 
1022 static inline pm_constant_id_t
1023 pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024  return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025 }
1026 
1032 static pm_node_t *
1033 pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034  pm_node_t *void_node = NULL;
1035 
1036  while (node != NULL) {
1037  switch (PM_NODE_TYPE(node)) {
1038  case PM_RETURN_NODE:
1039  case PM_BREAK_NODE:
1040  case PM_NEXT_NODE:
1041  case PM_REDO_NODE:
1042  case PM_RETRY_NODE:
1044  return void_node != NULL ? void_node : node;
1046  return NULL;
1047  case PM_BEGIN_NODE: {
1048  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049 
1050  if (cast->ensure_clause != NULL) {
1051  if (cast->rescue_clause != NULL) {
1052  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053  if (vn != NULL) return vn;
1054  }
1055 
1056  if (cast->statements != NULL) {
1057  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058  if (vn != NULL) return vn;
1059  }
1060 
1061  node = (pm_node_t *) cast->ensure_clause;
1062  } else if (cast->rescue_clause != NULL) {
1063  if (cast->statements == NULL) return NULL;
1064 
1065  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066  if (vn == NULL) return NULL;
1067  if (void_node == NULL) void_node = vn;
1068 
1069  for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071  if (vn == NULL) {
1072  void_node = NULL;
1073  break;
1074  }
1075  if (void_node == NULL) {
1076  void_node = vn;
1077  }
1078  }
1079 
1080  if (cast->else_clause != NULL) {
1081  node = (pm_node_t *) cast->else_clause;
1082  } else {
1083  return void_node;
1084  }
1085  } else {
1086  node = (pm_node_t *) cast->statements;
1087  }
1088 
1089  break;
1090  }
1091  case PM_ENSURE_NODE: {
1092  pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093  node = (pm_node_t *) cast->statements;
1094  break;
1095  }
1096  case PM_PARENTHESES_NODE: {
1098  node = (pm_node_t *) cast->body;
1099  break;
1100  }
1101  case PM_STATEMENTS_NODE: {
1102  pm_statements_node_t *cast = (pm_statements_node_t *) node;
1103  node = cast->body.nodes[cast->body.size - 1];
1104  break;
1105  }
1106  case PM_IF_NODE: {
1107  pm_if_node_t *cast = (pm_if_node_t *) node;
1108  if (cast->statements == NULL || cast->subsequent == NULL) {
1109  return NULL;
1110  }
1111  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112  if (vn == NULL) {
1113  return NULL;
1114  }
1115  if (void_node == NULL) {
1116  void_node = vn;
1117  }
1118  node = cast->subsequent;
1119  break;
1120  }
1121  case PM_UNLESS_NODE: {
1122  pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123  if (cast->statements == NULL || cast->else_clause == NULL) {
1124  return NULL;
1125  }
1126  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127  if (vn == NULL) {
1128  return NULL;
1129  }
1130  if (void_node == NULL) {
1131  void_node = vn;
1132  }
1133  node = (pm_node_t *) cast->else_clause;
1134  break;
1135  }
1136  case PM_ELSE_NODE: {
1137  pm_else_node_t *cast = (pm_else_node_t *) node;
1138  node = (pm_node_t *) cast->statements;
1139  break;
1140  }
1141  case PM_AND_NODE: {
1142  pm_and_node_t *cast = (pm_and_node_t *) node;
1143  node = cast->left;
1144  break;
1145  }
1146  case PM_OR_NODE: {
1147  pm_or_node_t *cast = (pm_or_node_t *) node;
1148  node = cast->left;
1149  break;
1150  }
1153 
1154  pm_scope_t *scope = parser->current_scope;
1155  for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156 
1157  pm_locals_read(&scope->locals, cast->name);
1158  return NULL;
1159  }
1160  default:
1161  return NULL;
1162  }
1163  }
1164 
1165  return NULL;
1166 }
1167 
1168 static inline void
1169 pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170  pm_node_t *void_node = pm_check_value_expression(parser, node);
1171  if (void_node != NULL) {
1172  pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173  }
1174 }
1175 
1179 static void
1180 pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181  const char *type = NULL;
1182  int length = 0;
1183 
1184  switch (PM_NODE_TYPE(node)) {
1191  type = "a variable";
1192  length = 10;
1193  break;
1194  case PM_CALL_NODE: {
1195  const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196  if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197 
1198  const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199  switch (message->length) {
1200  case 1:
1201  switch (message->start[0]) {
1202  case '+':
1203  case '-':
1204  case '*':
1205  case '/':
1206  case '%':
1207  case '|':
1208  case '^':
1209  case '&':
1210  case '>':
1211  case '<':
1212  type = (const char *) message->start;
1213  length = 1;
1214  break;
1215  }
1216  break;
1217  case 2:
1218  switch (message->start[1]) {
1219  case '=':
1220  if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221  type = (const char *) message->start;
1222  length = 2;
1223  }
1224  break;
1225  case '@':
1226  if (message->start[0] == '+' || message->start[0] == '-') {
1227  type = (const char *) message->start;
1228  length = 2;
1229  }
1230  break;
1231  case '*':
1232  if (message->start[0] == '*') {
1233  type = (const char *) message->start;
1234  length = 2;
1235  }
1236  break;
1237  }
1238  break;
1239  case 3:
1240  if (memcmp(message->start, "<=>", 3) == 0) {
1241  type = "<=>";
1242  length = 3;
1243  }
1244  break;
1245  }
1246 
1247  break;
1248  }
1249  case PM_CONSTANT_PATH_NODE:
1250  type = "::";
1251  length = 2;
1252  break;
1253  case PM_CONSTANT_READ_NODE:
1254  type = "a constant";
1255  length = 10;
1256  break;
1257  case PM_DEFINED_NODE:
1258  type = "defined?";
1259  length = 8;
1260  break;
1261  case PM_FALSE_NODE:
1262  type = "false";
1263  length = 5;
1264  break;
1265  case PM_FLOAT_NODE:
1266  case PM_IMAGINARY_NODE:
1267  case PM_INTEGER_NODE:
1270  case PM_RATIONAL_NODE:
1273  case PM_SOURCE_FILE_NODE:
1274  case PM_SOURCE_LINE_NODE:
1275  case PM_STRING_NODE:
1276  case PM_SYMBOL_NODE:
1277  type = "a literal";
1278  length = 9;
1279  break;
1280  case PM_NIL_NODE:
1281  type = "nil";
1282  length = 3;
1283  break;
1284  case PM_RANGE_NODE: {
1285  const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286 
1288  type = "...";
1289  length = 3;
1290  } else {
1291  type = "..";
1292  length = 2;
1293  }
1294 
1295  break;
1296  }
1297  case PM_SELF_NODE:
1298  type = "self";
1299  length = 4;
1300  break;
1301  case PM_TRUE_NODE:
1302  type = "true";
1303  length = 4;
1304  break;
1305  default:
1306  break;
1307  }
1308 
1309  if (type != NULL) {
1310  PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311  }
1312 }
1313 
1318 static void
1319 pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320  assert(node->body.size > 0);
1321  const size_t size = node->body.size - (last_value ? 1 : 0);
1322  for (size_t index = 0; index < size; index++) {
1323  pm_void_statement_check(parser, node->body.nodes[index]);
1324  }
1325 }
1326 
1332 typedef enum {
1333  PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334  PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335  PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336 } pm_conditional_predicate_type_t;
1337 
1341 static void
1342 pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343  switch (type) {
1344  case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345  PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346  break;
1347  case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348  PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349  break;
1350  case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351  break;
1352  }
1353 }
1354 
1359 static bool
1360 pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361  switch (PM_NODE_TYPE(node)) {
1362  case PM_ARRAY_NODE: {
1363  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364 
1365  const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366  for (size_t index = 0; index < cast->elements.size; index++) {
1367  if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368  }
1369 
1370  return true;
1371  }
1372  case PM_HASH_NODE: {
1373  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374 
1375  const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376  for (size_t index = 0; index < cast->elements.size; index++) {
1377  const pm_node_t *element = cast->elements.nodes[index];
1378  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379 
1380  const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381  if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382  }
1383 
1384  return true;
1385  }
1386  case PM_FALSE_NODE:
1387  case PM_FLOAT_NODE:
1388  case PM_IMAGINARY_NODE:
1389  case PM_INTEGER_NODE:
1390  case PM_NIL_NODE:
1391  case PM_RATIONAL_NODE:
1394  case PM_SOURCE_FILE_NODE:
1395  case PM_SOURCE_LINE_NODE:
1396  case PM_STRING_NODE:
1397  case PM_SYMBOL_NODE:
1398  case PM_TRUE_NODE:
1399  return true;
1400  default:
1401  return false;
1402  }
1403 }
1404 
1409 static inline void
1410 pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412  pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413  }
1414 }
1415 
1428 static void
1429 pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430  switch (PM_NODE_TYPE(node)) {
1431  case PM_AND_NODE: {
1432  pm_and_node_t *cast = (pm_and_node_t *) node;
1433  pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434  pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435  break;
1436  }
1437  case PM_OR_NODE: {
1438  pm_or_node_t *cast = (pm_or_node_t *) node;
1439  pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440  pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441  break;
1442  }
1443  case PM_PARENTHESES_NODE: {
1445 
1446  if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447  pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448  if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449  }
1450 
1451  break;
1452  }
1453  case PM_BEGIN_NODE: {
1454  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455  if (cast->statements != NULL) {
1456  pm_statements_node_t *statements = cast->statements;
1457  if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458  }
1459  break;
1460  }
1461  case PM_RANGE_NODE: {
1462  pm_range_node_t *cast = (pm_range_node_t *) node;
1463 
1464  if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465  if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466 
1467  // Here we change the range node into a flip flop node. We can do
1468  // this since the nodes are exactly the same except for the type.
1469  // We're only asserting against the size when we should probably
1470  // assert against the entire layout, but we'll assume tests will
1471  // catch this.
1472  assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473  node->type = PM_FLIP_FLOP_NODE;
1474 
1475  break;
1476  }
1478  // Here we change the regular expression node into a match last line
1479  // node. We can do this since the nodes are exactly the same except
1480  // for the type.
1481  assert(sizeof(pm_regular_expression_node_t) == sizeof(pm_match_last_line_node_t));
1482  node->type = PM_MATCH_LAST_LINE_NODE;
1483 
1484  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486  }
1487 
1488  break;
1490  // Here we change the interpolated regular expression node into an
1491  // interpolated match last line node. We can do this since the nodes
1492  // are exactly the same except for the type.
1495 
1496  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498  }
1499 
1500  break;
1501  case PM_INTEGER_NODE:
1502  if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504  pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505  }
1506  } else {
1507  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508  }
1509  break;
1510  case PM_STRING_NODE:
1511  case PM_SOURCE_FILE_NODE:
1513  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514  break;
1515  case PM_SYMBOL_NODE:
1517  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518  break;
1519  case PM_SOURCE_LINE_NODE:
1521  case PM_FLOAT_NODE:
1522  case PM_RATIONAL_NODE:
1523  case PM_IMAGINARY_NODE:
1524  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525  break;
1527  pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528  break;
1530  pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531  break;
1533  pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534  break;
1536  pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537  break;
1539  pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540  break;
1541  case PM_MULTI_WRITE_NODE:
1542  pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543  break;
1544  default:
1545  break;
1546  }
1547 }
1548 
1557 static inline pm_token_t
1558 not_provided(pm_parser_t *parser) {
1559  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560 }
1561 
1562 #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563 #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564 #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565 #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566 #define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567 #define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568 
1575 typedef struct {
1578 
1581 
1584 
1587 
1590 } pm_arguments_t;
1591 
1595 static inline const uint8_t *
1596 pm_arguments_end(pm_arguments_t *arguments) {
1597  if (arguments->block != NULL) {
1598  const uint8_t *end = arguments->block->location.end;
1599  if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600  end = arguments->closing_loc.end;
1601  }
1602  return end;
1603  }
1604  if (arguments->closing_loc.start != NULL) {
1605  return arguments->closing_loc.end;
1606  }
1607  if (arguments->arguments != NULL) {
1608  return arguments->arguments->base.location.end;
1609  }
1610  return arguments->closing_loc.end;
1611 }
1612 
1617 static void
1618 pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619  // First, check that we have arguments and that we don't have a closing
1620  // location for them.
1621  if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622  return;
1623  }
1624 
1625  // Next, check that we don't have a single parentheses argument. This would
1626  // look like:
1627  //
1628  // foo (1) {}
1629  //
1630  // In this case, it's actually okay for the block to be attached to the
1631  // call, even though it looks like it's attached to the argument.
1632  if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633  return;
1634  }
1635 
1636  // If we didn't hit a case before this check, then at this point we need to
1637  // add a syntax error.
1638  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639 }
1640 
1641 /******************************************************************************/
1642 /* Basic character checks */
1643 /******************************************************************************/
1644 
1651 static inline size_t
1652 char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1653  if (parser->encoding_changed) {
1654  size_t width;
1655  if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
1656  return width;
1657  } else if (*b == '_') {
1658  return 1;
1659  } else if (*b >= 0x80) {
1660  return parser->encoding->char_width(b, parser->end - b);
1661  } else {
1662  return 0;
1663  }
1664  } else if (*b < 0x80) {
1665  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1666  } else {
1667  return pm_encoding_utf_8_char_width(b, parser->end - b);
1668  }
1669 }
1670 
1675 static inline size_t
1676 char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1677  if (*b < 0x80) {
1678  return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1679  } else {
1680  return pm_encoding_utf_8_char_width(b, end - b);
1681  }
1682 }
1683 
1689 static inline size_t
1690 char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1691  if (parser->encoding_changed) {
1692  size_t width;
1693  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
1694  return width;
1695  } else if (*b == '_') {
1696  return 1;
1697  } else if (*b >= 0x80) {
1698  return parser->encoding->char_width(b, parser->end - b);
1699  } else {
1700  return 0;
1701  }
1702  }
1703  return char_is_identifier_utf8(b, parser->end);
1704 }
1705 
1706 // Here we're defining a perfect hash for the characters that are allowed in
1707 // global names. This is used to quickly check the next character after a $ to
1708 // see if it's a valid character for a global name.
1709 #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1710 #define PUNCT(idx) ( \
1711  BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1712  BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1713  BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1714  BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1715  BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1716  BIT('0', idx))
1717 
1718 const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1719 
1720 #undef BIT
1721 #undef PUNCT
1722 
1723 static inline bool
1724 char_is_global_name_punctuation(const uint8_t b) {
1725  const unsigned int i = (const unsigned int) b;
1726  if (i <= 0x20 || 0x7e < i) return false;
1727 
1728  return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1729 }
1730 
1731 static inline bool
1732 token_is_setter_name(pm_token_t *token) {
1733  return (
1734  (token->type == PM_TOKEN_IDENTIFIER) &&
1735  (token->end - token->start >= 2) &&
1736  (token->end[-1] == '=')
1737  );
1738 }
1739 
1743 static bool
1744 pm_local_is_keyword(const char *source, size_t length) {
1745 #define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1746 
1747  switch (length) {
1748  case 2:
1749  switch (source[0]) {
1750  case 'd': KEYWORD("do"); return false;
1751  case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1752  case 'o': KEYWORD("or"); return false;
1753  default: return false;
1754  }
1755  case 3:
1756  switch (source[0]) {
1757  case 'a': KEYWORD("and"); return false;
1758  case 'd': KEYWORD("def"); return false;
1759  case 'e': KEYWORD("end"); return false;
1760  case 'f': KEYWORD("for"); return false;
1761  case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1762  default: return false;
1763  }
1764  case 4:
1765  switch (source[0]) {
1766  case 'c': KEYWORD("case"); return false;
1767  case 'e': KEYWORD("else"); return false;
1768  case 'n': KEYWORD("next"); return false;
1769  case 'r': KEYWORD("redo"); return false;
1770  case 's': KEYWORD("self"); return false;
1771  case 't': KEYWORD("then"); KEYWORD("true"); return false;
1772  case 'w': KEYWORD("when"); return false;
1773  default: return false;
1774  }
1775  case 5:
1776  switch (source[0]) {
1777  case 'a': KEYWORD("alias"); return false;
1778  case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1779  case 'c': KEYWORD("class"); return false;
1780  case 'e': KEYWORD("elsif"); return false;
1781  case 'f': KEYWORD("false"); return false;
1782  case 'r': KEYWORD("retry"); return false;
1783  case 's': KEYWORD("super"); return false;
1784  case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1785  case 'w': KEYWORD("while"); return false;
1786  case 'y': KEYWORD("yield"); return false;
1787  default: return false;
1788  }
1789  case 6:
1790  switch (source[0]) {
1791  case 'e': KEYWORD("ensure"); return false;
1792  case 'm': KEYWORD("module"); return false;
1793  case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1794  case 'u': KEYWORD("unless"); return false;
1795  default: return false;
1796  }
1797  case 8:
1798  KEYWORD("__LINE__");
1799  KEYWORD("__FILE__");
1800  return false;
1801  case 12:
1802  KEYWORD("__ENCODING__");
1803  return false;
1804  default:
1805  return false;
1806  }
1807 
1808 #undef KEYWORD
1809 }
1810 
1811 /******************************************************************************/
1812 /* Node flag handling functions */
1813 /******************************************************************************/
1814 
1818 static inline void
1819 pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1820  node->flags |= flag;
1821 }
1822 
1826 static inline void
1827 pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1828  node->flags &= (pm_node_flags_t) ~flag;
1829 }
1830 
1834 static inline void
1835 pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1836  assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1837  PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1838  PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1839  PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1840  PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1841  PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1842  PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1843  PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1844 
1845  pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1846 }
1847 
1848 /******************************************************************************/
1849 /* Node creation functions */
1850 /******************************************************************************/
1851 
1857 #define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1858 
1862 static inline pm_node_flags_t
1863 pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1864  pm_node_flags_t flags = 0;
1865 
1866  if (closing->type == PM_TOKEN_REGEXP_END) {
1867  pm_buffer_t unknown_flags = { 0 };
1868 
1869  for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1870  switch (*flag) {
1871  case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1872  case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1873  case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1874  case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1875 
1876  case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1877  case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1878  case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1879  case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1880 
1881  default: pm_buffer_append_byte(&unknown_flags, *flag);
1882  }
1883  }
1884 
1885  size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1886  if (unknown_flags_length != 0) {
1887  const char *word = unknown_flags_length >= 2 ? "options" : "option";
1888  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1889  }
1890  pm_buffer_free(&unknown_flags);
1891  }
1892 
1893  return flags;
1894 }
1895 
1896 #undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1897 
1898 static pm_statements_node_t *
1899 pm_statements_node_create(pm_parser_t *parser);
1900 
1901 static void
1902 pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1903 
1904 static size_t
1905 pm_statements_node_body_length(pm_statements_node_t *node);
1906 
1911 static inline void *
1912 pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1913  void *memory = xcalloc(1, size);
1914  if (memory == NULL) {
1915  fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1916  abort();
1917  }
1918  return memory;
1919 }
1920 
1921 #define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1922 #define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1923 
1927 static pm_missing_node_t *
1928 pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1929  pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1930 
1931  *node = (pm_missing_node_t) {{
1932  .type = PM_MISSING_NODE,
1933  .node_id = PM_NODE_IDENTIFY(parser),
1934  .location = { .start = start, .end = end }
1935  }};
1936 
1937  return node;
1938 }
1939 
1943 static pm_alias_global_variable_node_t *
1944 pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1945  assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1946  pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1947 
1948  *node = (pm_alias_global_variable_node_t) {
1949  {
1950  .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1951  .node_id = PM_NODE_IDENTIFY(parser),
1952  .location = {
1953  .start = keyword->start,
1954  .end = old_name->location.end
1955  },
1956  },
1957  .new_name = new_name,
1958  .old_name = old_name,
1959  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1960  };
1961 
1962  return node;
1963 }
1964 
1968 static pm_alias_method_node_t *
1969 pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1970  assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1971  pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1972 
1973  *node = (pm_alias_method_node_t) {
1974  {
1975  .type = PM_ALIAS_METHOD_NODE,
1976  .node_id = PM_NODE_IDENTIFY(parser),
1977  .location = {
1978  .start = keyword->start,
1979  .end = old_name->location.end
1980  },
1981  },
1982  .new_name = new_name,
1983  .old_name = old_name,
1984  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1985  };
1986 
1987  return node;
1988 }
1989 
1993 static pm_alternation_pattern_node_t *
1994 pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
1995  pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
1996 
1997  *node = (pm_alternation_pattern_node_t) {
1998  {
1999  .type = PM_ALTERNATION_PATTERN_NODE,
2000  .node_id = PM_NODE_IDENTIFY(parser),
2001  .location = {
2002  .start = left->location.start,
2003  .end = right->location.end
2004  },
2005  },
2006  .left = left,
2007  .right = right,
2008  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2009  };
2010 
2011  return node;
2012 }
2013 
2017 static pm_and_node_t *
2018 pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2019  pm_assert_value_expression(parser, left);
2020 
2021  pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2022 
2023  *node = (pm_and_node_t) {
2024  {
2025  .type = PM_AND_NODE,
2026  .node_id = PM_NODE_IDENTIFY(parser),
2027  .location = {
2028  .start = left->location.start,
2029  .end = right->location.end
2030  },
2031  },
2032  .left = left,
2033  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2034  .right = right
2035  };
2036 
2037  return node;
2038 }
2039 
2043 static pm_arguments_node_t *
2044 pm_arguments_node_create(pm_parser_t *parser) {
2045  pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2046 
2047  *node = (pm_arguments_node_t) {
2048  {
2049  .type = PM_ARGUMENTS_NODE,
2050  .node_id = PM_NODE_IDENTIFY(parser),
2051  .location = PM_LOCATION_NULL_VALUE(parser)
2052  },
2053  .arguments = { 0 }
2054  };
2055 
2056  return node;
2057 }
2058 
2062 static size_t
2063 pm_arguments_node_size(pm_arguments_node_t *node) {
2064  return node->arguments.size;
2065 }
2066 
2070 static void
2071 pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2072  if (pm_arguments_node_size(node) == 0) {
2073  node->base.location.start = argument->location.start;
2074  }
2075 
2076  node->base.location.end = argument->location.end;
2077  pm_node_list_append(&node->arguments, argument);
2078 
2079  if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2080  if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2081  pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2082  } else {
2083  pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2084  }
2085  }
2086 }
2087 
2091 static pm_array_node_t *
2092 pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2093  pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2094 
2095  *node = (pm_array_node_t) {
2096  {
2097  .type = PM_ARRAY_NODE,
2098  .flags = PM_NODE_FLAG_STATIC_LITERAL,
2099  .node_id = PM_NODE_IDENTIFY(parser),
2100  .location = PM_LOCATION_TOKEN_VALUE(opening)
2101  },
2102  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104  .elements = { 0 }
2105  };
2106 
2107  return node;
2108 }
2109 
2113 static inline void
2114 pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115  if (!node->elements.size && !node->opening_loc.start) {
2116  node->base.location.start = element->location.start;
2117  }
2118 
2119  pm_node_list_append(&node->elements, element);
2120  node->base.location.end = element->location.end;
2121 
2122  // If the element is not a static literal, then the array is not a static
2123  // literal. Turn that flag off.
2124  if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125  pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2126  }
2127 
2128  if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129  pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130  }
2131 }
2132 
2136 static void
2137 pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138  assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139  node->base.location.end = closing->end;
2140  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141 }
2142 
2147 static pm_array_pattern_node_t *
2148 pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150 
2151  *node = (pm_array_pattern_node_t) {
2152  {
2153  .type = PM_ARRAY_PATTERN_NODE,
2154  .node_id = PM_NODE_IDENTIFY(parser),
2155  .location = {
2156  .start = nodes->nodes[0]->location.start,
2157  .end = nodes->nodes[nodes->size - 1]->location.end
2158  },
2159  },
2160  .constant = NULL,
2161  .rest = NULL,
2162  .requireds = { 0 },
2163  .posts = { 0 },
2164  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2165  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2166  };
2167 
2168  // For now we're going to just copy over each pointer manually. This could be
2169  // much more efficient, as we could instead resize the node list.
2170  bool found_rest = false;
2171  pm_node_t *child;
2172 
2173  PM_NODE_LIST_FOREACH(nodes, index, child) {
2174  if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2175  node->rest = child;
2176  found_rest = true;
2177  } else if (found_rest) {
2178  pm_node_list_append(&node->posts, child);
2179  } else {
2180  pm_node_list_append(&node->requireds, child);
2181  }
2182  }
2183 
2184  return node;
2185 }
2186 
2190 static pm_array_pattern_node_t *
2191 pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2192  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2193 
2194  *node = (pm_array_pattern_node_t) {
2195  {
2196  .type = PM_ARRAY_PATTERN_NODE,
2197  .node_id = PM_NODE_IDENTIFY(parser),
2198  .location = rest->location,
2199  },
2200  .constant = NULL,
2201  .rest = rest,
2202  .requireds = { 0 },
2203  .posts = { 0 },
2204  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2205  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2206  };
2207 
2208  return node;
2209 }
2210 
2215 static pm_array_pattern_node_t *
2216 pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2217  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2218 
2219  *node = (pm_array_pattern_node_t) {
2220  {
2221  .type = PM_ARRAY_PATTERN_NODE,
2222  .node_id = PM_NODE_IDENTIFY(parser),
2223  .location = {
2224  .start = constant->location.start,
2225  .end = closing->end
2226  },
2227  },
2228  .constant = constant,
2229  .rest = NULL,
2230  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2231  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2232  .requireds = { 0 },
2233  .posts = { 0 }
2234  };
2235 
2236  return node;
2237 }
2238 
2243 static pm_array_pattern_node_t *
2244 pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2245  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2246 
2247  *node = (pm_array_pattern_node_t) {
2248  {
2249  .type = PM_ARRAY_PATTERN_NODE,
2250  .node_id = PM_NODE_IDENTIFY(parser),
2251  .location = {
2252  .start = opening->start,
2253  .end = closing->end
2254  },
2255  },
2256  .constant = NULL,
2257  .rest = NULL,
2258  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2259  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2260  .requireds = { 0 },
2261  .posts = { 0 }
2262  };
2263 
2264  return node;
2265 }
2266 
2267 static inline void
2268 pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2269  pm_node_list_append(&node->requireds, inner);
2270 }
2271 
2275 static pm_assoc_node_t *
2276 pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2277  pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2278  const uint8_t *end;
2279 
2280  if (value != NULL && value->location.end > key->location.end) {
2281  end = value->location.end;
2282  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2283  end = operator->end;
2284  } else {
2285  end = key->location.end;
2286  }
2287 
2288  // Hash string keys will be frozen, so we can mark them as frozen here so
2289  // that the compiler picks them up and also when we check for static literal
2290  // on the keys it gets factored in.
2291  if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2292  key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2293  }
2294 
2295  // If the key and value of this assoc node are both static literals, then
2296  // we can mark this node as a static literal.
2297  pm_node_flags_t flags = 0;
2298  if (
2299  !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2300  value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2301  ) {
2302  flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2303  }
2304 
2305  *node = (pm_assoc_node_t) {
2306  {
2307  .type = PM_ASSOC_NODE,
2308  .flags = flags,
2309  .node_id = PM_NODE_IDENTIFY(parser),
2310  .location = {
2311  .start = key->location.start,
2312  .end = end
2313  },
2314  },
2315  .key = key,
2316  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2317  .value = value
2318  };
2319 
2320  return node;
2321 }
2322 
2326 static pm_assoc_splat_node_t *
2327 pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2328  assert(operator->type == PM_TOKEN_USTAR_STAR);
2329  pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2330 
2331  *node = (pm_assoc_splat_node_t) {
2332  {
2333  .type = PM_ASSOC_SPLAT_NODE,
2334  .node_id = PM_NODE_IDENTIFY(parser),
2335  .location = {
2336  .start = operator->start,
2337  .end = value == NULL ? operator->end : value->location.end
2338  },
2339  },
2340  .value = value,
2341  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2342  };
2343 
2344  return node;
2345 }
2346 
2350 static pm_back_reference_read_node_t *
2351 pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2352  assert(name->type == PM_TOKEN_BACK_REFERENCE);
2353  pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2354 
2355  *node = (pm_back_reference_read_node_t) {
2356  {
2357  .type = PM_BACK_REFERENCE_READ_NODE,
2358  .node_id = PM_NODE_IDENTIFY(parser),
2359  .location = PM_LOCATION_TOKEN_VALUE(name),
2360  },
2361  .name = pm_parser_constant_id_token(parser, name)
2362  };
2363 
2364  return node;
2365 }
2366 
2370 static pm_begin_node_t *
2371 pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2372  pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2373 
2374  *node = (pm_begin_node_t) {
2375  {
2376  .type = PM_BEGIN_NODE,
2377  .node_id = PM_NODE_IDENTIFY(parser),
2378  .location = {
2379  .start = begin_keyword->start,
2380  .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2381  },
2382  },
2383  .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2384  .statements = statements,
2385  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2386  };
2387 
2388  return node;
2389 }
2390 
2394 static void
2395 pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2396  // If the begin keyword doesn't exist, we set the start on the begin_node
2397  if (!node->begin_keyword_loc.start) {
2398  node->base.location.start = rescue_clause->base.location.start;
2399  }
2400  node->base.location.end = rescue_clause->base.location.end;
2401  node->rescue_clause = rescue_clause;
2402 }
2403 
2407 static void
2408 pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2409  node->base.location.end = else_clause->base.location.end;
2410  node->else_clause = else_clause;
2411 }
2412 
2416 static void
2417 pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2418  node->base.location.end = ensure_clause->base.location.end;
2419  node->ensure_clause = ensure_clause;
2420 }
2421 
2425 static void
2426 pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2427  assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2428 
2429  node->base.location.end = end_keyword->end;
2430  node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2431 }
2432 
2436 static pm_block_argument_node_t *
2437 pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2438  pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2439 
2440  *node = (pm_block_argument_node_t) {
2441  {
2442  .type = PM_BLOCK_ARGUMENT_NODE,
2443  .node_id = PM_NODE_IDENTIFY(parser),
2444  .location = {
2445  .start = operator->start,
2446  .end = expression == NULL ? operator->end : expression->location.end
2447  },
2448  },
2449  .expression = expression,
2450  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2451  };
2452 
2453  return node;
2454 }
2455 
2459 static pm_block_node_t *
2460 pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2461  pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2462 
2463  *node = (pm_block_node_t) {
2464  {
2465  .type = PM_BLOCK_NODE,
2466  .node_id = PM_NODE_IDENTIFY(parser),
2467  .location = { .start = opening->start, .end = closing->end },
2468  },
2469  .locals = *locals,
2470  .parameters = parameters,
2471  .body = body,
2472  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2473  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2474  };
2475 
2476  return node;
2477 }
2478 
2482 static pm_block_parameter_node_t *
2483 pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2484  assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2485  pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2486 
2487  *node = (pm_block_parameter_node_t) {
2488  {
2489  .type = PM_BLOCK_PARAMETER_NODE,
2490  .node_id = PM_NODE_IDENTIFY(parser),
2491  .location = {
2492  .start = operator->start,
2493  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2494  },
2495  },
2496  .name = pm_parser_optional_constant_id_token(parser, name),
2497  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2498  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2499  };
2500 
2501  return node;
2502 }
2503 
2507 static pm_block_parameters_node_t *
2508 pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2509  pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2510 
2511  const uint8_t *start;
2512  if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2513  start = opening->start;
2514  } else if (parameters != NULL) {
2515  start = parameters->base.location.start;
2516  } else {
2517  start = NULL;
2518  }
2519 
2520  const uint8_t *end;
2521  if (parameters != NULL) {
2522  end = parameters->base.location.end;
2523  } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2524  end = opening->end;
2525  } else {
2526  end = NULL;
2527  }
2528 
2529  *node = (pm_block_parameters_node_t) {
2530  {
2531  .type = PM_BLOCK_PARAMETERS_NODE,
2532  .node_id = PM_NODE_IDENTIFY(parser),
2533  .location = {
2534  .start = start,
2535  .end = end
2536  }
2537  },
2538  .parameters = parameters,
2539  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2540  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2541  .locals = { 0 }
2542  };
2543 
2544  return node;
2545 }
2546 
2550 static void
2551 pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2552  assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2553 
2554  node->base.location.end = closing->end;
2555  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2556 }
2557 
2561 static pm_block_local_variable_node_t *
2562 pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2563  pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2564 
2565  *node = (pm_block_local_variable_node_t) {
2566  {
2567  .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2568  .node_id = PM_NODE_IDENTIFY(parser),
2569  .location = PM_LOCATION_TOKEN_VALUE(name),
2570  },
2571  .name = pm_parser_constant_id_token(parser, name)
2572  };
2573 
2574  return node;
2575 }
2576 
2580 static void
2581 pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2582  pm_node_list_append(&node->locals, (pm_node_t *) local);
2583 
2584  if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2585  node->base.location.end = local->base.location.end;
2586 }
2587 
2591 static pm_break_node_t *
2592 pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2593  assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2594  pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2595 
2596  *node = (pm_break_node_t) {
2597  {
2598  .type = PM_BREAK_NODE,
2599  .node_id = PM_NODE_IDENTIFY(parser),
2600  .location = {
2601  .start = keyword->start,
2602  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2603  },
2604  },
2605  .arguments = arguments,
2606  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2607  };
2608 
2609  return node;
2610 }
2611 
2612 // There are certain flags that we want to use internally but don't want to
2613 // expose because they are not relevant beyond parsing. Therefore we'll define
2614 // them here and not define them in config.yml/a header file.
2615 static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2616 static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2617 static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2618 static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2619 
2625 static pm_call_node_t *
2626 pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2627  pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2628 
2629  *node = (pm_call_node_t) {
2630  {
2631  .type = PM_CALL_NODE,
2632  .flags = flags,
2633  .node_id = PM_NODE_IDENTIFY(parser),
2634  .location = PM_LOCATION_NULL_VALUE(parser),
2635  },
2636  .receiver = NULL,
2637  .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2638  .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2639  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2640  .arguments = NULL,
2641  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2642  .block = NULL,
2643  .name = 0
2644  };
2645 
2646  return node;
2647 }
2648 
2653 static inline pm_node_flags_t
2654 pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2655  return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2656 }
2657 
2662 static pm_call_node_t *
2663 pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2664  pm_assert_value_expression(parser, receiver);
2665 
2666  pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2667  if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2668  flags |= PM_CALL_NODE_FLAGS_INDEX;
2669  }
2670 
2671  pm_call_node_t *node = pm_call_node_create(parser, flags);
2672 
2673  node->base.location.start = receiver->location.start;
2674  node->base.location.end = pm_arguments_end(arguments);
2675 
2676  node->receiver = receiver;
2677  node->message_loc.start = arguments->opening_loc.start;
2678  node->message_loc.end = arguments->closing_loc.end;
2679 
2680  node->opening_loc = arguments->opening_loc;
2681  node->arguments = arguments->arguments;
2682  node->closing_loc = arguments->closing_loc;
2683  node->block = arguments->block;
2684 
2685  node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2686  return node;
2687 }
2688 
2692 static pm_call_node_t *
2693 pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2694  pm_assert_value_expression(parser, receiver);
2695  pm_assert_value_expression(parser, argument);
2696 
2697  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2698 
2699  node->base.location.start = MIN(receiver->location.start, argument->location.start);
2700  node->base.location.end = MAX(receiver->location.end, argument->location.end);
2701 
2702  node->receiver = receiver;
2703  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2704 
2705  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2706  pm_arguments_node_arguments_append(arguments, argument);
2707  node->arguments = arguments;
2708 
2709  node->name = pm_parser_constant_id_token(parser, operator);
2710  return node;
2711 }
2712 
2716 static pm_call_node_t *
2717 pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2718  pm_assert_value_expression(parser, receiver);
2719 
2720  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2721 
2722  node->base.location.start = receiver->location.start;
2723  const uint8_t *end = pm_arguments_end(arguments);
2724  if (end == NULL) {
2725  end = message->end;
2726  }
2727  node->base.location.end = end;
2728 
2729  node->receiver = receiver;
2730  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2731  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2732  node->opening_loc = arguments->opening_loc;
2733  node->arguments = arguments->arguments;
2734  node->closing_loc = arguments->closing_loc;
2735  node->block = arguments->block;
2736 
2737  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2738  pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2739  }
2740 
2741  node->name = pm_parser_constant_id_token(parser, message);
2742  return node;
2743 }
2744 
2748 static pm_call_node_t *
2749 pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2750  pm_call_node_t *node = pm_call_node_create(parser, 0);
2751  node->base.location.start = parser->start;
2752  node->base.location.end = parser->end;
2753 
2754  node->receiver = receiver;
2755  node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2756  node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2757  node->arguments = arguments;
2758 
2759  node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2760  return node;
2761 }
2762 
2767 static pm_call_node_t *
2768 pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2769  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2770 
2771  node->base.location.start = message->start;
2772  node->base.location.end = pm_arguments_end(arguments);
2773 
2774  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2775  node->opening_loc = arguments->opening_loc;
2776  node->arguments = arguments->arguments;
2777  node->closing_loc = arguments->closing_loc;
2778  node->block = arguments->block;
2779 
2780  node->name = pm_parser_constant_id_token(parser, message);
2781  return node;
2782 }
2783 
2788 static pm_call_node_t *
2789 pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2790  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2791 
2792  node->base.location = PM_LOCATION_NULL_VALUE(parser);
2793  node->arguments = arguments;
2794 
2795  node->name = name;
2796  return node;
2797 }
2798 
2802 static pm_call_node_t *
2803 pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2804  pm_assert_value_expression(parser, receiver);
2805  if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2806 
2807  pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2808 
2809  node->base.location.start = message->start;
2810  if (arguments->closing_loc.start != NULL) {
2811  node->base.location.end = arguments->closing_loc.end;
2812  } else {
2813  assert(receiver != NULL);
2814  node->base.location.end = receiver->location.end;
2815  }
2816 
2817  node->receiver = receiver;
2818  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819  node->opening_loc = arguments->opening_loc;
2820  node->arguments = arguments->arguments;
2821  node->closing_loc = arguments->closing_loc;
2822 
2823  node->name = pm_parser_constant_id_constant(parser, "!", 1);
2824  return node;
2825 }
2826 
2830 static pm_call_node_t *
2831 pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2832  pm_assert_value_expression(parser, receiver);
2833 
2834  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2835 
2836  node->base.location.start = receiver->location.start;
2837  node->base.location.end = pm_arguments_end(arguments);
2838 
2839  node->receiver = receiver;
2840  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2841  node->opening_loc = arguments->opening_loc;
2842  node->arguments = arguments->arguments;
2843  node->closing_loc = arguments->closing_loc;
2844  node->block = arguments->block;
2845 
2846  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2847  pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2848  }
2849 
2850  node->name = pm_parser_constant_id_constant(parser, "call", 4);
2851  return node;
2852 }
2853 
2857 static pm_call_node_t *
2858 pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2859  pm_assert_value_expression(parser, receiver);
2860 
2861  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2862 
2863  node->base.location.start = operator->start;
2864  node->base.location.end = receiver->location.end;
2865 
2866  node->receiver = receiver;
2867  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2868 
2869  node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2870  return node;
2871 }
2872 
2877 static pm_call_node_t *
2878 pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2879  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2880 
2881  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2882  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2883 
2884  node->name = pm_parser_constant_id_token(parser, message);
2885  return node;
2886 }
2887 
2892 static inline bool
2893 pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2894  return (
2895  (node->message_loc.start != NULL) &&
2896  (node->message_loc.end[-1] != '!') &&
2897  (node->message_loc.end[-1] != '?') &&
2898  char_is_identifier_start(parser, node->message_loc.start) &&
2899  (node->opening_loc.start == NULL) &&
2900  (node->arguments == NULL) &&
2901  (node->block == NULL)
2902  );
2903 }
2904 
2908 static void
2909 pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2910  pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2911 
2912  if (write_constant->length > 0) {
2913  size_t length = write_constant->length - 1;
2914 
2915  void *memory = xmalloc(length);
2916  memcpy(memory, write_constant->start, length);
2917 
2918  *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2919  } else {
2920  // We can get here if the message was missing because of a syntax error.
2921  *read_name = pm_parser_constant_id_constant(parser, "", 0);
2922  }
2923 }
2924 
2928 static pm_call_and_write_node_t *
2929 pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2930  assert(target->block == NULL);
2931  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932  pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2933 
2934  *node = (pm_call_and_write_node_t) {
2935  {
2936  .type = PM_CALL_AND_WRITE_NODE,
2937  .flags = target->base.flags,
2938  .node_id = PM_NODE_IDENTIFY(parser),
2939  .location = {
2940  .start = target->base.location.start,
2941  .end = value->location.end
2942  }
2943  },
2944  .receiver = target->receiver,
2945  .call_operator_loc = target->call_operator_loc,
2946  .message_loc = target->message_loc,
2947  .read_name = 0,
2948  .write_name = target->name,
2949  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2950  .value = value
2951  };
2952 
2953  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2954 
2955  // Here we're going to free the target, since it is no longer necessary.
2956  // However, we don't want to call `pm_node_destroy` because we want to keep
2957  // around all of its children since we just reused them.
2958  xfree(target);
2959 
2960  return node;
2961 }
2962 
2967 static void
2968 pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2969  if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2970  if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2971  pm_node_t *node;
2972  PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2973  if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2974  pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2975  break;
2976  }
2977  }
2978  }
2979 
2980  if (block != NULL) {
2981  pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2982  }
2983  }
2984 }
2985 
2989 static pm_index_and_write_node_t *
2990 pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2991  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2992  pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2993 
2994  pm_index_arguments_check(parser, target->arguments, target->block);
2995 
2996  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2997  *node = (pm_index_and_write_node_t) {
2998  {
2999  .type = PM_INDEX_AND_WRITE_NODE,
3000  .flags = target->base.flags,
3001  .node_id = PM_NODE_IDENTIFY(parser),
3002  .location = {
3003  .start = target->base.location.start,
3004  .end = value->location.end
3005  }
3006  },
3007  .receiver = target->receiver,
3008  .call_operator_loc = target->call_operator_loc,
3009  .opening_loc = target->opening_loc,
3010  .arguments = target->arguments,
3011  .closing_loc = target->closing_loc,
3012  .block = (pm_block_argument_node_t *) target->block,
3013  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3014  .value = value
3015  };
3016 
3017  // Here we're going to free the target, since it is no longer necessary.
3018  // However, we don't want to call `pm_node_destroy` because we want to keep
3019  // around all of its children since we just reused them.
3020  xfree(target);
3021 
3022  return node;
3023 }
3024 
3028 static pm_call_operator_write_node_t *
3029 pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3030  assert(target->block == NULL);
3031  pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3032 
3033  *node = (pm_call_operator_write_node_t) {
3034  {
3035  .type = PM_CALL_OPERATOR_WRITE_NODE,
3036  .flags = target->base.flags,
3037  .node_id = PM_NODE_IDENTIFY(parser),
3038  .location = {
3039  .start = target->base.location.start,
3040  .end = value->location.end
3041  }
3042  },
3043  .receiver = target->receiver,
3044  .call_operator_loc = target->call_operator_loc,
3045  .message_loc = target->message_loc,
3046  .read_name = 0,
3047  .write_name = target->name,
3048  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3049  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3050  .value = value
3051  };
3052 
3053  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3054 
3055  // Here we're going to free the target, since it is no longer necessary.
3056  // However, we don't want to call `pm_node_destroy` because we want to keep
3057  // around all of its children since we just reused them.
3058  xfree(target);
3059 
3060  return node;
3061 }
3062 
3066 static pm_index_operator_write_node_t *
3067 pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3068  pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3069 
3070  pm_index_arguments_check(parser, target->arguments, target->block);
3071 
3072  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3073  *node = (pm_index_operator_write_node_t) {
3074  {
3075  .type = PM_INDEX_OPERATOR_WRITE_NODE,
3076  .flags = target->base.flags,
3077  .node_id = PM_NODE_IDENTIFY(parser),
3078  .location = {
3079  .start = target->base.location.start,
3080  .end = value->location.end
3081  }
3082  },
3083  .receiver = target->receiver,
3084  .call_operator_loc = target->call_operator_loc,
3085  .opening_loc = target->opening_loc,
3086  .arguments = target->arguments,
3087  .closing_loc = target->closing_loc,
3088  .block = (pm_block_argument_node_t *) target->block,
3089  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3090  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3091  .value = value
3092  };
3093 
3094  // Here we're going to free the target, since it is no longer necessary.
3095  // However, we don't want to call `pm_node_destroy` because we want to keep
3096  // around all of its children since we just reused them.
3097  xfree(target);
3098 
3099  return node;
3100 }
3101 
3105 static pm_call_or_write_node_t *
3106 pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3107  assert(target->block == NULL);
3108  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3109  pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3110 
3111  *node = (pm_call_or_write_node_t) {
3112  {
3113  .type = PM_CALL_OR_WRITE_NODE,
3114  .flags = target->base.flags,
3115  .node_id = PM_NODE_IDENTIFY(parser),
3116  .location = {
3117  .start = target->base.location.start,
3118  .end = value->location.end
3119  }
3120  },
3121  .receiver = target->receiver,
3122  .call_operator_loc = target->call_operator_loc,
3123  .message_loc = target->message_loc,
3124  .read_name = 0,
3125  .write_name = target->name,
3126  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3127  .value = value
3128  };
3129 
3130  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3131 
3132  // Here we're going to free the target, since it is no longer necessary.
3133  // However, we don't want to call `pm_node_destroy` because we want to keep
3134  // around all of its children since we just reused them.
3135  xfree(target);
3136 
3137  return node;
3138 }
3139 
3143 static pm_index_or_write_node_t *
3144 pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3145  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3146  pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3147 
3148  pm_index_arguments_check(parser, target->arguments, target->block);
3149 
3150  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3151  *node = (pm_index_or_write_node_t) {
3152  {
3153  .type = PM_INDEX_OR_WRITE_NODE,
3154  .flags = target->base.flags,
3155  .node_id = PM_NODE_IDENTIFY(parser),
3156  .location = {
3157  .start = target->base.location.start,
3158  .end = value->location.end
3159  }
3160  },
3161  .receiver = target->receiver,
3162  .call_operator_loc = target->call_operator_loc,
3163  .opening_loc = target->opening_loc,
3164  .arguments = target->arguments,
3165  .closing_loc = target->closing_loc,
3166  .block = (pm_block_argument_node_t *) target->block,
3167  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3168  .value = value
3169  };
3170 
3171  // Here we're going to free the target, since it is no longer necessary.
3172  // However, we don't want to call `pm_node_destroy` because we want to keep
3173  // around all of its children since we just reused them.
3174  xfree(target);
3175 
3176  return node;
3177 }
3178 
3183 static pm_call_target_node_t *
3184 pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3185  pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3186 
3187  *node = (pm_call_target_node_t) {
3188  {
3189  .type = PM_CALL_TARGET_NODE,
3190  .flags = target->base.flags,
3191  .node_id = PM_NODE_IDENTIFY(parser),
3192  .location = target->base.location
3193  },
3194  .receiver = target->receiver,
3195  .call_operator_loc = target->call_operator_loc,
3196  .name = target->name,
3197  .message_loc = target->message_loc
3198  };
3199 
3200  // Here we're going to free the target, since it is no longer necessary.
3201  // However, we don't want to call `pm_node_destroy` because we want to keep
3202  // around all of its children since we just reused them.
3203  xfree(target);
3204 
3205  return node;
3206 }
3207 
3212 static pm_index_target_node_t *
3213 pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3214  pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3215  pm_node_flags_t flags = target->base.flags;
3216 
3217  pm_index_arguments_check(parser, target->arguments, target->block);
3218 
3219  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3220  *node = (pm_index_target_node_t) {
3221  {
3222  .type = PM_INDEX_TARGET_NODE,
3223  .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3224  .node_id = PM_NODE_IDENTIFY(parser),
3225  .location = target->base.location
3226  },
3227  .receiver = target->receiver,
3228  .opening_loc = target->opening_loc,
3229  .arguments = target->arguments,
3230  .closing_loc = target->closing_loc,
3231  .block = (pm_block_argument_node_t *) target->block,
3232  };
3233 
3234  // Here we're going to free the target, since it is no longer necessary.
3235  // However, we don't want to call `pm_node_destroy` because we want to keep
3236  // around all of its children since we just reused them.
3237  xfree(target);
3238 
3239  return node;
3240 }
3241 
3245 static pm_capture_pattern_node_t *
3246 pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3247  pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3248 
3249  *node = (pm_capture_pattern_node_t) {
3250  {
3251  .type = PM_CAPTURE_PATTERN_NODE,
3252  .node_id = PM_NODE_IDENTIFY(parser),
3253  .location = {
3254  .start = value->location.start,
3255  .end = target->base.location.end
3256  },
3257  },
3258  .value = value,
3259  .target = target,
3260  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3261  };
3262 
3263  return node;
3264 }
3265 
3269 static pm_case_node_t *
3270 pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3271  pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3272 
3273  *node = (pm_case_node_t) {
3274  {
3275  .type = PM_CASE_NODE,
3276  .node_id = PM_NODE_IDENTIFY(parser),
3277  .location = {
3278  .start = case_keyword->start,
3279  .end = end_keyword->end
3280  },
3281  },
3282  .predicate = predicate,
3283  .else_clause = NULL,
3284  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3285  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3286  .conditions = { 0 }
3287  };
3288 
3289  return node;
3290 }
3291 
3295 static void
3296 pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3297  assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3298 
3299  pm_node_list_append(&node->conditions, condition);
3300  node->base.location.end = condition->location.end;
3301 }
3302 
3306 static void
3307 pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3308  node->else_clause = else_clause;
3309  node->base.location.end = else_clause->base.location.end;
3310 }
3311 
3315 static void
3316 pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3317  node->base.location.end = end_keyword->end;
3318  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3319 }
3320 
3324 static pm_case_match_node_t *
3325 pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3326  pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3327 
3328  *node = (pm_case_match_node_t) {
3329  {
3330  .type = PM_CASE_MATCH_NODE,
3331  .node_id = PM_NODE_IDENTIFY(parser),
3332  .location = {
3333  .start = case_keyword->start,
3334  .end = end_keyword->end
3335  },
3336  },
3337  .predicate = predicate,
3338  .else_clause = NULL,
3339  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3340  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3341  .conditions = { 0 }
3342  };
3343 
3344  return node;
3345 }
3346 
3350 static void
3351 pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3352  assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3353 
3354  pm_node_list_append(&node->conditions, condition);
3355  node->base.location.end = condition->location.end;
3356 }
3357 
3361 static void
3362 pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3363  node->else_clause = else_clause;
3364  node->base.location.end = else_clause->base.location.end;
3365 }
3366 
3370 static void
3371 pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3372  node->base.location.end = end_keyword->end;
3373  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3374 }
3375 
3379 static pm_class_node_t *
3380 pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3381  pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3382 
3383  *node = (pm_class_node_t) {
3384  {
3385  .type = PM_CLASS_NODE,
3386  .node_id = PM_NODE_IDENTIFY(parser),
3387  .location = { .start = class_keyword->start, .end = end_keyword->end },
3388  },
3389  .locals = *locals,
3390  .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3391  .constant_path = constant_path,
3392  .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3393  .superclass = superclass,
3394  .body = body,
3395  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3396  .name = pm_parser_constant_id_token(parser, name)
3397  };
3398 
3399  return node;
3400 }
3401 
3405 static pm_class_variable_and_write_node_t *
3406 pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3407  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3408  pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3409 
3410  *node = (pm_class_variable_and_write_node_t) {
3411  {
3412  .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3413  .node_id = PM_NODE_IDENTIFY(parser),
3414  .location = {
3415  .start = target->base.location.start,
3416  .end = value->location.end
3417  }
3418  },
3419  .name = target->name,
3420  .name_loc = target->base.location,
3421  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3422  .value = value
3423  };
3424 
3425  return node;
3426 }
3427 
3431 static pm_class_variable_operator_write_node_t *
3432 pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3433  pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3434 
3435  *node = (pm_class_variable_operator_write_node_t) {
3436  {
3437  .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3438  .node_id = PM_NODE_IDENTIFY(parser),
3439  .location = {
3440  .start = target->base.location.start,
3441  .end = value->location.end
3442  }
3443  },
3444  .name = target->name,
3445  .name_loc = target->base.location,
3446  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3447  .value = value,
3448  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3449  };
3450 
3451  return node;
3452 }
3453 
3457 static pm_class_variable_or_write_node_t *
3458 pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3459  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3460  pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3461 
3462  *node = (pm_class_variable_or_write_node_t) {
3463  {
3464  .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3465  .node_id = PM_NODE_IDENTIFY(parser),
3466  .location = {
3467  .start = target->base.location.start,
3468  .end = value->location.end
3469  }
3470  },
3471  .name = target->name,
3472  .name_loc = target->base.location,
3473  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3474  .value = value
3475  };
3476 
3477  return node;
3478 }
3479 
3483 static pm_class_variable_read_node_t *
3484 pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3485  assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3486  pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3487 
3488  *node = (pm_class_variable_read_node_t) {
3489  {
3490  .type = PM_CLASS_VARIABLE_READ_NODE,
3491  .node_id = PM_NODE_IDENTIFY(parser),
3492  .location = PM_LOCATION_TOKEN_VALUE(token)
3493  },
3494  .name = pm_parser_constant_id_token(parser, token)
3495  };
3496 
3497  return node;
3498 }
3499 
3506 static inline pm_node_flags_t
3507 pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3508  if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3509  return flags;
3510  }
3511  return 0;
3512 }
3513 
3517 static pm_class_variable_write_node_t *
3518 pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3519  pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3520 
3521  *node = (pm_class_variable_write_node_t) {
3522  {
3523  .type = PM_CLASS_VARIABLE_WRITE_NODE,
3524  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3525  .node_id = PM_NODE_IDENTIFY(parser),
3526  .location = {
3527  .start = read_node->base.location.start,
3528  .end = value->location.end
3529  },
3530  },
3531  .name = read_node->name,
3532  .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3533  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3534  .value = value
3535  };
3536 
3537  return node;
3538 }
3539 
3543 static pm_constant_path_and_write_node_t *
3544 pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3545  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3546  pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3547 
3548  *node = (pm_constant_path_and_write_node_t) {
3549  {
3550  .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3551  .node_id = PM_NODE_IDENTIFY(parser),
3552  .location = {
3553  .start = target->base.location.start,
3554  .end = value->location.end
3555  }
3556  },
3557  .target = target,
3558  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3559  .value = value
3560  };
3561 
3562  return node;
3563 }
3564 
3568 static pm_constant_path_operator_write_node_t *
3569 pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3570  pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3571 
3572  *node = (pm_constant_path_operator_write_node_t) {
3573  {
3574  .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3575  .node_id = PM_NODE_IDENTIFY(parser),
3576  .location = {
3577  .start = target->base.location.start,
3578  .end = value->location.end
3579  }
3580  },
3581  .target = target,
3582  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583  .value = value,
3584  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585  };
3586 
3587  return node;
3588 }
3589 
3593 static pm_constant_path_or_write_node_t *
3594 pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3595  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3596  pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3597 
3598  *node = (pm_constant_path_or_write_node_t) {
3599  {
3600  .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3601  .node_id = PM_NODE_IDENTIFY(parser),
3602  .location = {
3603  .start = target->base.location.start,
3604  .end = value->location.end
3605  }
3606  },
3607  .target = target,
3608  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3609  .value = value
3610  };
3611 
3612  return node;
3613 }
3614 
3618 static pm_constant_path_node_t *
3619 pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3620  pm_assert_value_expression(parser, parent);
3621  pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3622 
3623  pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3624  if (name_token->type == PM_TOKEN_CONSTANT) {
3625  name = pm_parser_constant_id_token(parser, name_token);
3626  }
3627 
3628  *node = (pm_constant_path_node_t) {
3629  {
3630  .type = PM_CONSTANT_PATH_NODE,
3631  .node_id = PM_NODE_IDENTIFY(parser),
3632  .location = {
3633  .start = parent == NULL ? delimiter->start : parent->location.start,
3634  .end = name_token->end
3635  },
3636  },
3637  .parent = parent,
3638  .name = name,
3639  .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3640  .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3641  };
3642 
3643  return node;
3644 }
3645 
3649 static pm_constant_path_write_node_t *
3650 pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3651  pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3652 
3653  *node = (pm_constant_path_write_node_t) {
3654  {
3655  .type = PM_CONSTANT_PATH_WRITE_NODE,
3656  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3657  .node_id = PM_NODE_IDENTIFY(parser),
3658  .location = {
3659  .start = target->base.location.start,
3660  .end = value->location.end
3661  },
3662  },
3663  .target = target,
3664  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3665  .value = value
3666  };
3667 
3668  return node;
3669 }
3670 
3674 static pm_constant_and_write_node_t *
3675 pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3676  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3677  pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3678 
3679  *node = (pm_constant_and_write_node_t) {
3680  {
3681  .type = PM_CONSTANT_AND_WRITE_NODE,
3682  .node_id = PM_NODE_IDENTIFY(parser),
3683  .location = {
3684  .start = target->base.location.start,
3685  .end = value->location.end
3686  }
3687  },
3688  .name = target->name,
3689  .name_loc = target->base.location,
3690  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3691  .value = value
3692  };
3693 
3694  return node;
3695 }
3696 
3700 static pm_constant_operator_write_node_t *
3701 pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3702  pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3703 
3704  *node = (pm_constant_operator_write_node_t) {
3705  {
3706  .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3707  .node_id = PM_NODE_IDENTIFY(parser),
3708  .location = {
3709  .start = target->base.location.start,
3710  .end = value->location.end
3711  }
3712  },
3713  .name = target->name,
3714  .name_loc = target->base.location,
3715  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3716  .value = value,
3717  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3718  };
3719 
3720  return node;
3721 }
3722 
3726 static pm_constant_or_write_node_t *
3727 pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3728  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3729  pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3730 
3731  *node = (pm_constant_or_write_node_t) {
3732  {
3733  .type = PM_CONSTANT_OR_WRITE_NODE,
3734  .node_id = PM_NODE_IDENTIFY(parser),
3735  .location = {
3736  .start = target->base.location.start,
3737  .end = value->location.end
3738  }
3739  },
3740  .name = target->name,
3741  .name_loc = target->base.location,
3742  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3743  .value = value
3744  };
3745 
3746  return node;
3747 }
3748 
3752 static pm_constant_read_node_t *
3753 pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3754  assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3755  pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3756 
3757  *node = (pm_constant_read_node_t) {
3758  {
3759  .type = PM_CONSTANT_READ_NODE,
3760  .node_id = PM_NODE_IDENTIFY(parser),
3761  .location = PM_LOCATION_TOKEN_VALUE(name)
3762  },
3763  .name = pm_parser_constant_id_token(parser, name)
3764  };
3765 
3766  return node;
3767 }
3768 
3772 static pm_constant_write_node_t *
3773 pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3774  pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3775 
3776  *node = (pm_constant_write_node_t) {
3777  {
3778  .type = PM_CONSTANT_WRITE_NODE,
3779  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3780  .node_id = PM_NODE_IDENTIFY(parser),
3781  .location = {
3782  .start = target->base.location.start,
3783  .end = value->location.end
3784  }
3785  },
3786  .name = target->name,
3787  .name_loc = target->base.location,
3788  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3789  .value = value
3790  };
3791 
3792  return node;
3793 }
3794 
3798 static void
3799 pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3800  switch (PM_NODE_TYPE(node)) {
3801  case PM_BEGIN_NODE: {
3802  const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3803  if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3804  break;
3805  }
3806  case PM_PARENTHESES_NODE: {
3807  const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3808  if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3809  break;
3810  }
3811  case PM_STATEMENTS_NODE: {
3812  const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3813  pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3814  break;
3815  }
3816  case PM_ARRAY_NODE:
3817  case PM_FLOAT_NODE:
3818  case PM_IMAGINARY_NODE:
3819  case PM_INTEGER_NODE:
3820  case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3821  case PM_INTERPOLATED_STRING_NODE:
3822  case PM_INTERPOLATED_SYMBOL_NODE:
3823  case PM_INTERPOLATED_X_STRING_NODE:
3824  case PM_RATIONAL_NODE:
3825  case PM_REGULAR_EXPRESSION_NODE:
3826  case PM_SOURCE_ENCODING_NODE:
3827  case PM_SOURCE_FILE_NODE:
3828  case PM_SOURCE_LINE_NODE:
3829  case PM_STRING_NODE:
3830  case PM_SYMBOL_NODE:
3831  case PM_X_STRING_NODE:
3832  pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3833  break;
3834  default:
3835  break;
3836  }
3837 }
3838 
3842 static pm_def_node_t *
3843 pm_def_node_create(
3844  pm_parser_t *parser,
3845  pm_constant_id_t name,
3846  const pm_token_t *name_loc,
3847  pm_node_t *receiver,
3848  pm_parameters_node_t *parameters,
3849  pm_node_t *body,
3850  pm_constant_id_list_t *locals,
3851  const pm_token_t *def_keyword,
3852  const pm_token_t *operator,
3853  const pm_token_t *lparen,
3854  const pm_token_t *rparen,
3855  const pm_token_t *equal,
3856  const pm_token_t *end_keyword
3857 ) {
3858  pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3859  const uint8_t *end;
3860 
3861  if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3862  end = body->location.end;
3863  } else {
3864  end = end_keyword->end;
3865  }
3866 
3867  if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3868  pm_def_node_receiver_check(parser, receiver);
3869  }
3870 
3871  *node = (pm_def_node_t) {
3872  {
3873  .type = PM_DEF_NODE,
3874  .node_id = PM_NODE_IDENTIFY(parser),
3875  .location = { .start = def_keyword->start, .end = end },
3876  },
3877  .name = name,
3878  .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3879  .receiver = receiver,
3880  .parameters = parameters,
3881  .body = body,
3882  .locals = *locals,
3883  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3884  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3885  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3886  .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3887  .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3888  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3889  };
3890 
3891  return node;
3892 }
3893 
3897 static pm_defined_node_t *
3898 pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3899  pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3900 
3901  *node = (pm_defined_node_t) {
3902  {
3903  .type = PM_DEFINED_NODE,
3904  .node_id = PM_NODE_IDENTIFY(parser),
3905  .location = {
3906  .start = keyword_loc->start,
3907  .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3908  },
3909  },
3910  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3911  .value = value,
3912  .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3913  .keyword_loc = *keyword_loc
3914  };
3915 
3916  return node;
3917 }
3918 
3922 static pm_else_node_t *
3923 pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3924  pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3925  const uint8_t *end = NULL;
3926  if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3927  end = statements->base.location.end;
3928  } else {
3929  end = end_keyword->end;
3930  }
3931 
3932  *node = (pm_else_node_t) {
3933  {
3934  .type = PM_ELSE_NODE,
3935  .node_id = PM_NODE_IDENTIFY(parser),
3936  .location = {
3937  .start = else_keyword->start,
3938  .end = end,
3939  },
3940  },
3941  .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3942  .statements = statements,
3943  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3944  };
3945 
3946  return node;
3947 }
3948 
3952 static pm_embedded_statements_node_t *
3953 pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3954  pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3955 
3956  *node = (pm_embedded_statements_node_t) {
3957  {
3958  .type = PM_EMBEDDED_STATEMENTS_NODE,
3959  .node_id = PM_NODE_IDENTIFY(parser),
3960  .location = {
3961  .start = opening->start,
3962  .end = closing->end
3963  }
3964  },
3965  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3966  .statements = statements,
3967  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3968  };
3969 
3970  return node;
3971 }
3972 
3976 static pm_embedded_variable_node_t *
3977 pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3978  pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3979 
3980  *node = (pm_embedded_variable_node_t) {
3981  {
3982  .type = PM_EMBEDDED_VARIABLE_NODE,
3983  .node_id = PM_NODE_IDENTIFY(parser),
3984  .location = {
3985  .start = operator->start,
3986  .end = variable->location.end
3987  }
3988  },
3989  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3990  .variable = variable
3991  };
3992 
3993  return node;
3994 }
3995 
3999 static pm_ensure_node_t *
4000 pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4001  pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4002 
4003  *node = (pm_ensure_node_t) {
4004  {
4005  .type = PM_ENSURE_NODE,
4006  .node_id = PM_NODE_IDENTIFY(parser),
4007  .location = {
4008  .start = ensure_keyword->start,
4009  .end = end_keyword->end
4010  },
4011  },
4012  .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4013  .statements = statements,
4014  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4015  };
4016 
4017  return node;
4018 }
4019 
4023 static pm_false_node_t *
4024 pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4025  assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4026  pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4027 
4028  *node = (pm_false_node_t) {{
4029  .type = PM_FALSE_NODE,
4030  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4031  .node_id = PM_NODE_IDENTIFY(parser),
4032  .location = PM_LOCATION_TOKEN_VALUE(token)
4033  }};
4034 
4035  return node;
4036 }
4037 
4042 static pm_find_pattern_node_t *
4043 pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4044  pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4045 
4046  pm_node_t *left = nodes->nodes[0];
4047  assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4048  pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4049 
4050  pm_node_t *right;
4051 
4052  if (nodes->size == 1) {
4053  right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4054  } else {
4055  right = nodes->nodes[nodes->size - 1];
4056  assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4057  }
4058 
4059 #if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4060  // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4061  // The resulting AST will anyway be ignored, but this file still needs to compile.
4062  pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4063 #else
4064  pm_node_t *right_splat_node = right;
4065 #endif
4066  *node = (pm_find_pattern_node_t) {
4067  {
4068  .type = PM_FIND_PATTERN_NODE,
4069  .node_id = PM_NODE_IDENTIFY(parser),
4070  .location = {
4071  .start = left->location.start,
4072  .end = right->location.end,
4073  },
4074  },
4075  .constant = NULL,
4076  .left = left_splat_node,
4077  .right = right_splat_node,
4078  .requireds = { 0 },
4079  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4080  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4081  };
4082 
4083  // For now we're going to just copy over each pointer manually. This could be
4084  // much more efficient, as we could instead resize the node list to only point
4085  // to 1...-1.
4086  for (size_t index = 1; index < nodes->size - 1; index++) {
4087  pm_node_list_append(&node->requireds, nodes->nodes[index]);
4088  }
4089 
4090  return node;
4091 }
4092 
4097 static double
4098 pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4099  ptrdiff_t diff = token->end - token->start;
4100  if (diff <= 0) return 0.0;
4101 
4102  // First, get a buffer of the content.
4103  size_t length = (size_t) diff;
4104  char *buffer = xmalloc(sizeof(char) * (length + 1));
4105  memcpy((void *) buffer, token->start, length);
4106 
4107  // Next, determine if we need to replace the decimal point because of
4108  // locale-specific options, and then normalize them if we have to.
4109  char decimal_point = *localeconv()->decimal_point;
4110  if (decimal_point != '.') {
4111  for (size_t index = 0; index < length; index++) {
4112  if (buffer[index] == '.') buffer[index] = decimal_point;
4113  }
4114  }
4115 
4116  // Next, handle underscores by removing them from the buffer.
4117  for (size_t index = 0; index < length; index++) {
4118  if (buffer[index] == '_') {
4119  memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4120  length--;
4121  }
4122  }
4123 
4124  // Null-terminate the buffer so that strtod cannot read off the end.
4125  buffer[length] = '\0';
4126 
4127  // Now, call strtod to parse the value. Note that CRuby has their own
4128  // version of strtod which avoids locales. We're okay using the locale-aware
4129  // version because we've already validated through the parser that the token
4130  // is in a valid format.
4131  errno = 0;
4132  char *eptr;
4133  double value = strtod(buffer, &eptr);
4134 
4135  // This should never happen, because we've already checked that the token
4136  // is in a valid format. However it's good to be safe.
4137  if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4138  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4139  xfree((void *) buffer);
4140  return 0.0;
4141  }
4142 
4143  // If errno is set, then it should only be ERANGE. At this point we need to
4144  // check if it's infinity (it should be).
4145  if (
4146  errno == ERANGE &&
4147 #ifdef _WIN32
4148  !_finite(value)
4149 #else
4150  isinf(value)
4151 #endif
4152  ) {
4153  int warn_width;
4154  const char *ellipsis;
4155 
4156  if (length > 20) {
4157  warn_width = 20;
4158  ellipsis = "...";
4159  } else {
4160  warn_width = (int) length;
4161  ellipsis = "";
4162  }
4163 
4164  pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4165  value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4166  }
4167 
4168  // Finally we can free the buffer and return the value.
4169  xfree((void *) buffer);
4170  return value;
4171 }
4172 
4176 static pm_float_node_t *
4177 pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4178  assert(token->type == PM_TOKEN_FLOAT);
4179  pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4180 
4181  *node = (pm_float_node_t) {
4182  {
4183  .type = PM_FLOAT_NODE,
4184  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4185  .node_id = PM_NODE_IDENTIFY(parser),
4186  .location = PM_LOCATION_TOKEN_VALUE(token)
4187  },
4188  .value = pm_double_parse(parser, token)
4189  };
4190 
4191  return node;
4192 }
4193 
4197 static pm_imaginary_node_t *
4198 pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4199  assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4200 
4201  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4202  *node = (pm_imaginary_node_t) {
4203  {
4204  .type = PM_IMAGINARY_NODE,
4205  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4206  .node_id = PM_NODE_IDENTIFY(parser),
4207  .location = PM_LOCATION_TOKEN_VALUE(token)
4208  },
4209  .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4210  .type = PM_TOKEN_FLOAT,
4211  .start = token->start,
4212  .end = token->end - 1
4213  }))
4214  };
4215 
4216  return node;
4217 }
4218 
4222 static pm_rational_node_t *
4223 pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4224  assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4225 
4226  pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4227  *node = (pm_rational_node_t) {
4228  {
4229  .type = PM_RATIONAL_NODE,
4230  .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4231  .node_id = PM_NODE_IDENTIFY(parser),
4232  .location = PM_LOCATION_TOKEN_VALUE(token)
4233  },
4234  .numerator = { 0 },
4235  .denominator = { 0 }
4236  };
4237 
4238  const uint8_t *start = token->start;
4239  const uint8_t *end = token->end - 1; // r
4240 
4241  while (start < end && *start == '0') start++; // 0.1 -> .1
4242  while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4243 
4244  size_t length = (size_t) (end - start);
4245  if (length == 1) {
4246  node->denominator.value = 1;
4247  return node;
4248  }
4249 
4250  const uint8_t *point = memchr(start, '.', length);
4251  assert(point && "should have a decimal point");
4252 
4253  uint8_t *digits = malloc(length);
4254  if (digits == NULL) {
4255  fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4256  abort();
4257  }
4258 
4259  memcpy(digits, start, (unsigned long) (point - start));
4260  memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4261  pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4262 
4263  digits[0] = '1';
4264  if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4265  pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4266  free(digits);
4267 
4268  pm_integers_reduce(&node->numerator, &node->denominator);
4269  return node;
4270 }
4271 
4276 static pm_imaginary_node_t *
4277 pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4278  assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4279 
4280  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4281  *node = (pm_imaginary_node_t) {
4282  {
4283  .type = PM_IMAGINARY_NODE,
4284  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4285  .node_id = PM_NODE_IDENTIFY(parser),
4286  .location = PM_LOCATION_TOKEN_VALUE(token)
4287  },
4288  .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4289  .type = PM_TOKEN_FLOAT_RATIONAL,
4290  .start = token->start,
4291  .end = token->end - 1
4292  }))
4293  };
4294 
4295  return node;
4296 }
4297 
4301 static pm_for_node_t *
4302 pm_for_node_create(
4303  pm_parser_t *parser,
4304  pm_node_t *index,
4305  pm_node_t *collection,
4306  pm_statements_node_t *statements,
4307  const pm_token_t *for_keyword,
4308  const pm_token_t *in_keyword,
4309  const pm_token_t *do_keyword,
4310  const pm_token_t *end_keyword
4311 ) {
4312  pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4313 
4314  *node = (pm_for_node_t) {
4315  {
4316  .type = PM_FOR_NODE,
4317  .node_id = PM_NODE_IDENTIFY(parser),
4318  .location = {
4319  .start = for_keyword->start,
4320  .end = end_keyword->end
4321  },
4322  },
4323  .index = index,
4324  .collection = collection,
4325  .statements = statements,
4326  .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4327  .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4328  .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4329  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4330  };
4331 
4332  return node;
4333 }
4334 
4338 static pm_forwarding_arguments_node_t *
4339 pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4340  assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4341  pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4342 
4343  *node = (pm_forwarding_arguments_node_t) {{
4344  .type = PM_FORWARDING_ARGUMENTS_NODE,
4345  .node_id = PM_NODE_IDENTIFY(parser),
4346  .location = PM_LOCATION_TOKEN_VALUE(token)
4347  }};
4348 
4349  return node;
4350 }
4351 
4355 static pm_forwarding_parameter_node_t *
4356 pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4357  assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4358  pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4359 
4360  *node = (pm_forwarding_parameter_node_t) {{
4361  .type = PM_FORWARDING_PARAMETER_NODE,
4362  .node_id = PM_NODE_IDENTIFY(parser),
4363  .location = PM_LOCATION_TOKEN_VALUE(token)
4364  }};
4365 
4366  return node;
4367 }
4368 
4372 static pm_forwarding_super_node_t *
4373 pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4374  assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4375  assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4376  pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4377 
4378  pm_block_node_t *block = NULL;
4379  if (arguments->block != NULL) {
4380  block = (pm_block_node_t *) arguments->block;
4381  }
4382 
4383  *node = (pm_forwarding_super_node_t) {
4384  {
4385  .type = PM_FORWARDING_SUPER_NODE,
4386  .node_id = PM_NODE_IDENTIFY(parser),
4387  .location = {
4388  .start = token->start,
4389  .end = block != NULL ? block->base.location.end : token->end
4390  },
4391  },
4392  .block = block
4393  };
4394 
4395  return node;
4396 }
4397 
4402 static pm_hash_pattern_node_t *
4403 pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4404  pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4405 
4406  *node = (pm_hash_pattern_node_t) {
4407  {
4408  .type = PM_HASH_PATTERN_NODE,
4409  .node_id = PM_NODE_IDENTIFY(parser),
4410  .location = {
4411  .start = opening->start,
4412  .end = closing->end
4413  },
4414  },
4415  .constant = NULL,
4416  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4417  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4418  .elements = { 0 },
4419  .rest = NULL
4420  };
4421 
4422  return node;
4423 }
4424 
4428 static pm_hash_pattern_node_t *
4429 pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4430  pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4431 
4432  const uint8_t *start;
4433  const uint8_t *end;
4434 
4435  if (elements->size > 0) {
4436  if (rest) {
4437  start = elements->nodes[0]->location.start;
4438  end = rest->location.end;
4439  } else {
4440  start = elements->nodes[0]->location.start;
4441  end = elements->nodes[elements->size - 1]->location.end;
4442  }
4443  } else {
4444  assert(rest != NULL);
4445  start = rest->location.start;
4446  end = rest->location.end;
4447  }
4448 
4449  *node = (pm_hash_pattern_node_t) {
4450  {
4451  .type = PM_HASH_PATTERN_NODE,
4452  .node_id = PM_NODE_IDENTIFY(parser),
4453  .location = {
4454  .start = start,
4455  .end = end
4456  },
4457  },
4458  .constant = NULL,
4459  .elements = { 0 },
4460  .rest = rest,
4461  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4462  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4463  };
4464 
4465  pm_node_t *element;
4466  PM_NODE_LIST_FOREACH(elements, index, element) {
4467  pm_node_list_append(&node->elements, element);
4468  }
4469 
4470  return node;
4471 }
4472 
4476 static pm_constant_id_t
4477 pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4478  switch (PM_NODE_TYPE(target)) {
4479  case PM_GLOBAL_VARIABLE_READ_NODE:
4480  return ((pm_global_variable_read_node_t *) target)->name;
4481  case PM_BACK_REFERENCE_READ_NODE:
4482  return ((pm_back_reference_read_node_t *) target)->name;
4483  case PM_NUMBERED_REFERENCE_READ_NODE:
4484  // This will only ever happen in the event of a syntax error, but we
4485  // still need to provide something for the node.
4486  return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4487  default:
4488  assert(false && "unreachable");
4489  return (pm_constant_id_t) -1;
4490  }
4491 }
4492 
4496 static pm_global_variable_and_write_node_t *
4497 pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4498  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4499  pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4500 
4501  *node = (pm_global_variable_and_write_node_t) {
4502  {
4503  .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4504  .node_id = PM_NODE_IDENTIFY(parser),
4505  .location = {
4506  .start = target->location.start,
4507  .end = value->location.end
4508  }
4509  },
4510  .name = pm_global_variable_write_name(parser, target),
4511  .name_loc = target->location,
4512  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4513  .value = value
4514  };
4515 
4516  return node;
4517 }
4518 
4522 static pm_global_variable_operator_write_node_t *
4523 pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4524  pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4525 
4526  *node = (pm_global_variable_operator_write_node_t) {
4527  {
4528  .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4529  .node_id = PM_NODE_IDENTIFY(parser),
4530  .location = {
4531  .start = target->location.start,
4532  .end = value->location.end
4533  }
4534  },
4535  .name = pm_global_variable_write_name(parser, target),
4536  .name_loc = target->location,
4537  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4538  .value = value,
4539  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4540  };
4541 
4542  return node;
4543 }
4544 
4548 static pm_global_variable_or_write_node_t *
4549 pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4550  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4551  pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4552 
4553  *node = (pm_global_variable_or_write_node_t) {
4554  {
4555  .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4556  .node_id = PM_NODE_IDENTIFY(parser),
4557  .location = {
4558  .start = target->location.start,
4559  .end = value->location.end
4560  }
4561  },
4562  .name = pm_global_variable_write_name(parser, target),
4563  .name_loc = target->location,
4564  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4565  .value = value
4566  };
4567 
4568  return node;
4569 }
4570 
4574 static pm_global_variable_read_node_t *
4575 pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4576  pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4577 
4578  *node = (pm_global_variable_read_node_t) {
4579  {
4580  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4581  .node_id = PM_NODE_IDENTIFY(parser),
4582  .location = PM_LOCATION_TOKEN_VALUE(name),
4583  },
4584  .name = pm_parser_constant_id_token(parser, name)
4585  };
4586 
4587  return node;
4588 }
4589 
4593 static pm_global_variable_read_node_t *
4594 pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4595  pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4596 
4597  *node = (pm_global_variable_read_node_t) {
4598  {
4599  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4600  .node_id = PM_NODE_IDENTIFY(parser),
4601  .location = PM_LOCATION_NULL_VALUE(parser)
4602  },
4603  .name = name
4604  };
4605 
4606  return node;
4607 }
4608 
4612 static pm_global_variable_write_node_t *
4613 pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4614  pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4615 
4616  *node = (pm_global_variable_write_node_t) {
4617  {
4618  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4619  .node_id = PM_NODE_IDENTIFY(parser),
4620  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4621  .location = {
4622  .start = target->location.start,
4623  .end = value->location.end
4624  },
4625  },
4626  .name = pm_global_variable_write_name(parser, target),
4627  .name_loc = PM_LOCATION_NODE_VALUE(target),
4628  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4629  .value = value
4630  };
4631 
4632  return node;
4633 }
4634 
4638 static pm_global_variable_write_node_t *
4639 pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4640  pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4641 
4642  *node = (pm_global_variable_write_node_t) {
4643  {
4644  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4645  .node_id = PM_NODE_IDENTIFY(parser),
4646  .location = PM_LOCATION_NULL_VALUE(parser)
4647  },
4648  .name = name,
4649  .name_loc = PM_LOCATION_NULL_VALUE(parser),
4650  .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4651  .value = value
4652  };
4653 
4654  return node;
4655 }
4656 
4660 static pm_hash_node_t *
4661 pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4662  assert(opening != NULL);
4663  pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4664 
4665  *node = (pm_hash_node_t) {
4666  {
4667  .type = PM_HASH_NODE,
4668  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4669  .node_id = PM_NODE_IDENTIFY(parser),
4670  .location = PM_LOCATION_TOKEN_VALUE(opening)
4671  },
4672  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4673  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4674  .elements = { 0 }
4675  };
4676 
4677  return node;
4678 }
4679 
4683 static inline void
4684 pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4685  pm_node_list_append(&hash->elements, element);
4686 
4687  bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4688  if (static_literal) {
4689  pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4690  static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4691  static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4692  static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4693  }
4694 
4695  if (!static_literal) {
4696  pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4697  }
4698 }
4699 
4700 static inline void
4701 pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4702  hash->base.location.end = token->end;
4703  hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4704 }
4705 
4709 static pm_if_node_t *
4710 pm_if_node_create(pm_parser_t *parser,
4711  const pm_token_t *if_keyword,
4712  pm_node_t *predicate,
4713  const pm_token_t *then_keyword,
4714  pm_statements_node_t *statements,
4715  pm_node_t *subsequent,
4716  const pm_token_t *end_keyword
4717 ) {
4718  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4719  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4720 
4721  const uint8_t *end;
4722  if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4723  end = end_keyword->end;
4724  } else if (subsequent != NULL) {
4725  end = subsequent->location.end;
4726  } else if (pm_statements_node_body_length(statements) != 0) {
4727  end = statements->base.location.end;
4728  } else {
4729  end = predicate->location.end;
4730  }
4731 
4732  *node = (pm_if_node_t) {
4733  {
4734  .type = PM_IF_NODE,
4735  .flags = PM_NODE_FLAG_NEWLINE,
4736  .node_id = PM_NODE_IDENTIFY(parser),
4737  .location = {
4738  .start = if_keyword->start,
4739  .end = end
4740  },
4741  },
4742  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4743  .predicate = predicate,
4744  .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4745  .statements = statements,
4746  .subsequent = subsequent,
4747  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4748  };
4749 
4750  return node;
4751 }
4752 
4756 static pm_if_node_t *
4757 pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4758  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4759  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4760 
4761  pm_statements_node_t *statements = pm_statements_node_create(parser);
4762  pm_statements_node_body_append(parser, statements, statement, true);
4763 
4764  *node = (pm_if_node_t) {
4765  {
4766  .type = PM_IF_NODE,
4767  .flags = PM_NODE_FLAG_NEWLINE,
4768  .node_id = PM_NODE_IDENTIFY(parser),
4769  .location = {
4770  .start = statement->location.start,
4771  .end = predicate->location.end
4772  },
4773  },
4774  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4775  .predicate = predicate,
4776  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4777  .statements = statements,
4778  .subsequent = NULL,
4779  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4780  };
4781 
4782  return node;
4783 }
4784 
4788 static pm_if_node_t *
4789 pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4790  pm_assert_value_expression(parser, predicate);
4791  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4792 
4793  pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4794  pm_statements_node_body_append(parser, if_statements, true_expression, true);
4795 
4796  pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4797  pm_statements_node_body_append(parser, else_statements, false_expression, true);
4798 
4799  pm_token_t end_keyword = not_provided(parser);
4800  pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4801 
4802  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4803 
4804  *node = (pm_if_node_t) {
4805  {
4806  .type = PM_IF_NODE,
4807  .flags = PM_NODE_FLAG_NEWLINE,
4808  .node_id = PM_NODE_IDENTIFY(parser),
4809  .location = {
4810  .start = predicate->location.start,
4811  .end = false_expression->location.end,
4812  },
4813  },
4814  .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4815  .predicate = predicate,
4816  .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4817  .statements = if_statements,
4818  .subsequent = (pm_node_t *) else_node,
4819  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4820  };
4821 
4822  return node;
4823 
4824 }
4825 
4826 static inline void
4827 pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4828  node->base.location.end = keyword->end;
4829  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4830 }
4831 
4832 static inline void
4833 pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4834  node->base.location.end = keyword->end;
4835  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4836 }
4837 
4841 static pm_implicit_node_t *
4842 pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4843  pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4844 
4845  *node = (pm_implicit_node_t) {
4846  {
4847  .type = PM_IMPLICIT_NODE,
4848  .node_id = PM_NODE_IDENTIFY(parser),
4849  .location = value->location
4850  },
4851  .value = value
4852  };
4853 
4854  return node;
4855 }
4856 
4860 static pm_implicit_rest_node_t *
4861 pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4862  assert(token->type == PM_TOKEN_COMMA);
4863 
4864  pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4865 
4866  *node = (pm_implicit_rest_node_t) {
4867  {
4868  .type = PM_IMPLICIT_REST_NODE,
4869  .node_id = PM_NODE_IDENTIFY(parser),
4870  .location = PM_LOCATION_TOKEN_VALUE(token)
4871  }
4872  };
4873 
4874  return node;
4875 }
4876 
4880 static pm_integer_node_t *
4881 pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4882  assert(token->type == PM_TOKEN_INTEGER);
4883  pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4884 
4885  *node = (pm_integer_node_t) {
4886  {
4887  .type = PM_INTEGER_NODE,
4888  .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4889  .node_id = PM_NODE_IDENTIFY(parser),
4890  .location = PM_LOCATION_TOKEN_VALUE(token)
4891  },
4892  .value = { 0 }
4893  };
4894 
4895  pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4896  switch (base) {
4897  case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4898  case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4899  case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4900  case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4901  default: assert(false && "unreachable"); break;
4902  }
4903 
4904  pm_integer_parse(&node->value, integer_base, token->start, token->end);
4905  return node;
4906 }
4907 
4912 static pm_imaginary_node_t *
4913 pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4914  assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4915 
4916  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4917  *node = (pm_imaginary_node_t) {
4918  {
4919  .type = PM_IMAGINARY_NODE,
4920  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4921  .node_id = PM_NODE_IDENTIFY(parser),
4922  .location = PM_LOCATION_TOKEN_VALUE(token)
4923  },
4924  .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4925  .type = PM_TOKEN_INTEGER,
4926  .start = token->start,
4927  .end = token->end - 1
4928  }))
4929  };
4930 
4931  return node;
4932 }
4933 
4938 static pm_rational_node_t *
4939 pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4940  assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4941 
4942  pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4943  *node = (pm_rational_node_t) {
4944  {
4945  .type = PM_RATIONAL_NODE,
4946  .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4947  .node_id = PM_NODE_IDENTIFY(parser),
4948  .location = PM_LOCATION_TOKEN_VALUE(token)
4949  },
4950  .numerator = { 0 },
4951  .denominator = { .value = 1, 0 }
4952  };
4953 
4954  pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4955  switch (base) {
4956  case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4957  case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4958  case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4959  case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4960  default: assert(false && "unreachable"); break;
4961  }
4962 
4963  pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4964 
4965  return node;
4966 }
4967 
4972 static pm_imaginary_node_t *
4973 pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4974  assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4975 
4976  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4977  *node = (pm_imaginary_node_t) {
4978  {
4979  .type = PM_IMAGINARY_NODE,
4980  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4981  .node_id = PM_NODE_IDENTIFY(parser),
4982  .location = PM_LOCATION_TOKEN_VALUE(token)
4983  },
4984  .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4985  .type = PM_TOKEN_INTEGER_RATIONAL,
4986  .start = token->start,
4987  .end = token->end - 1
4988  }))
4989  };
4990 
4991  return node;
4992 }
4993 
4997 static pm_in_node_t *
4998 pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4999  pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
5000 
5001  const uint8_t *end;
5002  if (statements != NULL) {
5003  end = statements->base.location.end;
5004  } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
5005  end = then_keyword->end;
5006  } else {
5007  end = pattern->location.end;
5008  }
5009 
5010  *node = (pm_in_node_t) {
5011  {
5012  .type = PM_IN_NODE,
5013  .node_id = PM_NODE_IDENTIFY(parser),
5014  .location = {
5015  .start = in_keyword->start,
5016  .end = end
5017  },
5018  },
5019  .pattern = pattern,
5020  .statements = statements,
5021  .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5022  .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5023  };
5024 
5025  return node;
5026 }
5027 
5031 static pm_instance_variable_and_write_node_t *
5032 pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5033  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5034  pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5035 
5036  *node = (pm_instance_variable_and_write_node_t) {
5037  {
5038  .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5039  .node_id = PM_NODE_IDENTIFY(parser),
5040  .location = {
5041  .start = target->base.location.start,
5042  .end = value->location.end
5043  }
5044  },
5045  .name = target->name,
5046  .name_loc = target->base.location,
5047  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5048  .value = value
5049  };
5050 
5051  return node;
5052 }
5053 
5057 static pm_instance_variable_operator_write_node_t *
5058 pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5059  pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5060 
5061  *node = (pm_instance_variable_operator_write_node_t) {
5062  {
5063  .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5064  .node_id = PM_NODE_IDENTIFY(parser),
5065  .location = {
5066  .start = target->base.location.start,
5067  .end = value->location.end
5068  }
5069  },
5070  .name = target->name,
5071  .name_loc = target->base.location,
5072  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5073  .value = value,
5074  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5075  };
5076 
5077  return node;
5078 }
5079 
5083 static pm_instance_variable_or_write_node_t *
5084 pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5085  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5086  pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5087 
5088  *node = (pm_instance_variable_or_write_node_t) {
5089  {
5090  .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5091  .node_id = PM_NODE_IDENTIFY(parser),
5092  .location = {
5093  .start = target->base.location.start,
5094  .end = value->location.end
5095  }
5096  },
5097  .name = target->name,
5098  .name_loc = target->base.location,
5099  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5100  .value = value
5101  };
5102 
5103  return node;
5104 }
5105 
5109 static pm_instance_variable_read_node_t *
5110 pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5111  assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5112  pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5113 
5114  *node = (pm_instance_variable_read_node_t) {
5115  {
5116  .type = PM_INSTANCE_VARIABLE_READ_NODE,
5117  .node_id = PM_NODE_IDENTIFY(parser),
5118  .location = PM_LOCATION_TOKEN_VALUE(token)
5119  },
5120  .name = pm_parser_constant_id_token(parser, token)
5121  };
5122 
5123  return node;
5124 }
5125 
5130 static pm_instance_variable_write_node_t *
5131 pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5132  pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5133  *node = (pm_instance_variable_write_node_t) {
5134  {
5135  .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5136  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5137  .node_id = PM_NODE_IDENTIFY(parser),
5138  .location = {
5139  .start = read_node->base.location.start,
5140  .end = value->location.end
5141  }
5142  },
5143  .name = read_node->name,
5144  .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5145  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5146  .value = value
5147  };
5148 
5149  return node;
5150 }
5151 
5157 static void
5158 pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5159  switch (PM_NODE_TYPE(part)) {
5160  case PM_STRING_NODE:
5161  pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5162  break;
5163  case PM_EMBEDDED_STATEMENTS_NODE: {
5164  pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5165  pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5166 
5167  if (embedded == NULL) {
5168  // If there are no statements or more than one statement, then
5169  // we lose the static literal flag.
5170  pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5171  } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5172  // If the embedded statement is a string, then we can keep the
5173  // static literal flag and mark the string as frozen.
5174  pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5175  } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5176  // If the embedded statement is an interpolated string and it's
5177  // a static literal, then we can keep the static literal flag.
5178  } else {
5179  // Otherwise we lose the static literal flag.
5180  pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5181  }
5182 
5183  break;
5184  }
5185  case PM_EMBEDDED_VARIABLE_NODE:
5186  pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5187  break;
5188  default:
5189  assert(false && "unexpected node type");
5190  break;
5191  }
5192 
5193  pm_node_list_append(parts, part);
5194 }
5195 
5199 static pm_interpolated_regular_expression_node_t *
5200 pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5201  pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5202 
5203  *node = (pm_interpolated_regular_expression_node_t) {
5204  {
5205  .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5206  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5207  .node_id = PM_NODE_IDENTIFY(parser),
5208  .location = {
5209  .start = opening->start,
5210  .end = NULL,
5211  },
5212  },
5213  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5214  .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5215  .parts = { 0 }
5216  };
5217 
5218  return node;
5219 }
5220 
5221 static inline void
5222 pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5223  if (node->base.location.start > part->location.start) {
5224  node->base.location.start = part->location.start;
5225  }
5226  if (node->base.location.end < part->location.end) {
5227  node->base.location.end = part->location.end;
5228  }
5229 
5230  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5231 }
5232 
5233 static inline void
5234 pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5235  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5236  node->base.location.end = closing->end;
5237  pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5238 }
5239 
5263 static inline void
5264 pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5265 #define CLEAR_FLAGS(node) \
5266  node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5267 
5268 #define MUTABLE_FLAGS(node) \
5269  node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5270 
5271  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5272  node->base.location.start = part->location.start;
5273  }
5274 
5275  node->base.location.end = MAX(node->base.location.end, part->location.end);
5276 
5277  switch (PM_NODE_TYPE(part)) {
5278  case PM_STRING_NODE:
5279  part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5280  break;
5281  case PM_INTERPOLATED_STRING_NODE:
5282  if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5283  // If the string that we're concatenating is a static literal,
5284  // then we can keep the static literal flag for this string.
5285  } else {
5286  // Otherwise, we lose the static literal flag here and we should
5287  // also clear the mutability flags.
5288  CLEAR_FLAGS(node);
5289  }
5290  break;
5291  case PM_EMBEDDED_STATEMENTS_NODE: {
5292  pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5293  pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5294 
5295  if (embedded == NULL) {
5296  // If we're embedding multiple statements or no statements, then
5297  // the string is not longer a static literal.
5298  CLEAR_FLAGS(node);
5299  } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5300  // If the embedded statement is a string, then we can make that
5301  // string as frozen and static literal, and not touch the static
5302  // literal status of this string.
5303  embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5304 
5305  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5306  MUTABLE_FLAGS(node);
5307  }
5308  } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5309  // If the embedded statement is an interpolated string, but that
5310  // string is marked as static literal, then we can keep our
5311  // static literal status for this string.
5312  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5313  MUTABLE_FLAGS(node);
5314  }
5315  } else {
5316  // In all other cases, we lose the static literal flag here and
5317  // become mutable.
5318  CLEAR_FLAGS(node);
5319  }
5320 
5321  break;
5322  }
5323  case PM_EMBEDDED_VARIABLE_NODE:
5324  // Embedded variables clear static literal, which means we also
5325  // should clear the mutability flags.
5326  CLEAR_FLAGS(node);
5327  break;
5328  default:
5329  assert(false && "unexpected node type");
5330  break;
5331  }
5332 
5333  pm_node_list_append(&node->parts, part);
5334 
5335 #undef CLEAR_FLAGS
5336 #undef MUTABLE_FLAGS
5337 }
5338 
5342 static pm_interpolated_string_node_t *
5343 pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5344  pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5345  pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5346 
5347  switch (parser->frozen_string_literal) {
5348  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5349  flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5350  break;
5351  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5352  flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5353  break;
5354  }
5355 
5356  *node = (pm_interpolated_string_node_t) {
5357  {
5358  .type = PM_INTERPOLATED_STRING_NODE,
5359  .flags = flags,
5360  .node_id = PM_NODE_IDENTIFY(parser),
5361  .location = {
5362  .start = opening->start,
5363  .end = closing->end,
5364  },
5365  },
5366  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5367  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5368  .parts = { 0 }
5369  };
5370 
5371  if (parts != NULL) {
5372  pm_node_t *part;
5373  PM_NODE_LIST_FOREACH(parts, index, part) {
5374  pm_interpolated_string_node_append(node, part);
5375  }
5376  }
5377 
5378  return node;
5379 }
5380 
5384 static void
5385 pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5386  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5387  node->base.location.end = closing->end;
5388 }
5389 
5390 static void
5391 pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5392  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5393  node->base.location.start = part->location.start;
5394  }
5395 
5396  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5397  node->base.location.end = MAX(node->base.location.end, part->location.end);
5398 }
5399 
5400 static void
5401 pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5402  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5403  node->base.location.end = closing->end;
5404 }
5405 
5409 static pm_interpolated_symbol_node_t *
5410 pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5411  pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5412 
5413  *node = (pm_interpolated_symbol_node_t) {
5414  {
5415  .type = PM_INTERPOLATED_SYMBOL_NODE,
5416  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5417  .node_id = PM_NODE_IDENTIFY(parser),
5418  .location = {
5419  .start = opening->start,
5420  .end = closing->end,
5421  },
5422  },
5423  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5424  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5425  .parts = { 0 }
5426  };
5427 
5428  if (parts != NULL) {
5429  pm_node_t *part;
5430  PM_NODE_LIST_FOREACH(parts, index, part) {
5431  pm_interpolated_symbol_node_append(node, part);
5432  }
5433  }
5434 
5435  return node;
5436 }
5437 
5441 static pm_interpolated_x_string_node_t *
5442 pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5443  pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5444 
5445  *node = (pm_interpolated_x_string_node_t) {
5446  {
5447  .type = PM_INTERPOLATED_X_STRING_NODE,
5448  .node_id = PM_NODE_IDENTIFY(parser),
5449  .location = {
5450  .start = opening->start,
5451  .end = closing->end
5452  },
5453  },
5454  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5455  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5456  .parts = { 0 }
5457  };
5458 
5459  return node;
5460 }
5461 
5462 static inline void
5463 pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5464  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5465  node->base.location.end = part->location.end;
5466 }
5467 
5468 static inline void
5469 pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5470  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5471  node->base.location.end = closing->end;
5472 }
5473 
5477 static pm_it_local_variable_read_node_t *
5478 pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5479  pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5480 
5481  *node = (pm_it_local_variable_read_node_t) {
5482  {
5483  .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5484  .node_id = PM_NODE_IDENTIFY(parser),
5485  .location = PM_LOCATION_TOKEN_VALUE(name)
5486  }
5487  };
5488 
5489  return node;
5490 }
5491 
5495 static pm_it_parameters_node_t *
5496 pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5497  pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5498 
5499  *node = (pm_it_parameters_node_t) {
5500  {
5501  .type = PM_IT_PARAMETERS_NODE,
5502  .node_id = PM_NODE_IDENTIFY(parser),
5503  .location = {
5504  .start = opening->start,
5505  .end = closing->end
5506  }
5507  }
5508  };
5509 
5510  return node;
5511 }
5512 
5516 static pm_keyword_hash_node_t *
5517 pm_keyword_hash_node_create(pm_parser_t *parser) {
5518  pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5519 
5520  *node = (pm_keyword_hash_node_t) {
5521  .base = {
5522  .type = PM_KEYWORD_HASH_NODE,
5523  .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5524  .node_id = PM_NODE_IDENTIFY(parser),
5525  .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5526  },
5527  .elements = { 0 }
5528  };
5529 
5530  return node;
5531 }
5532 
5536 static void
5537 pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5538  // If the element being added is not an AssocNode or does not have a symbol
5539  // key, then we want to turn the SYMBOL_KEYS flag off.
5540  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5541  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5542  }
5543 
5544  pm_node_list_append(&hash->elements, element);
5545  if (hash->base.location.start == NULL) {
5546  hash->base.location.start = element->location.start;
5547  }
5548  hash->base.location.end = element->location.end;
5549 }
5550 
5554 static pm_required_keyword_parameter_node_t *
5555 pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5556  pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5557 
5558  *node = (pm_required_keyword_parameter_node_t) {
5559  {
5560  .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5561  .node_id = PM_NODE_IDENTIFY(parser),
5562  .location = {
5563  .start = name->start,
5564  .end = name->end
5565  },
5566  },
5567  .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5568  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5569  };
5570 
5571  return node;
5572 }
5573 
5577 static pm_optional_keyword_parameter_node_t *
5578 pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5579  pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5580 
5581  *node = (pm_optional_keyword_parameter_node_t) {
5582  {
5583  .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5584  .node_id = PM_NODE_IDENTIFY(parser),
5585  .location = {
5586  .start = name->start,
5587  .end = value->location.end
5588  },
5589  },
5590  .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5591  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5592  .value = value
5593  };
5594 
5595  return node;
5596 }
5597 
5601 static pm_keyword_rest_parameter_node_t *
5602 pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5603  pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5604 
5605  *node = (pm_keyword_rest_parameter_node_t) {
5606  {
5607  .type = PM_KEYWORD_REST_PARAMETER_NODE,
5608  .node_id = PM_NODE_IDENTIFY(parser),
5609  .location = {
5610  .start = operator->start,
5611  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5612  },
5613  },
5614  .name = pm_parser_optional_constant_id_token(parser, name),
5615  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5616  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5617  };
5618 
5619  return node;
5620 }
5621 
5625 static pm_lambda_node_t *
5626 pm_lambda_node_create(
5627  pm_parser_t *parser,
5628  pm_constant_id_list_t *locals,
5629  const pm_token_t *operator,
5630  const pm_token_t *opening,
5631  const pm_token_t *closing,
5632  pm_node_t *parameters,
5633  pm_node_t *body
5634 ) {
5635  pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5636 
5637  *node = (pm_lambda_node_t) {
5638  {
5639  .type = PM_LAMBDA_NODE,
5640  .node_id = PM_NODE_IDENTIFY(parser),
5641  .location = {
5642  .start = operator->start,
5643  .end = closing->end
5644  },
5645  },
5646  .locals = *locals,
5647  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5648  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5649  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5650  .parameters = parameters,
5651  .body = body
5652  };
5653 
5654  return node;
5655 }
5656 
5660 static pm_local_variable_and_write_node_t *
5661 pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5662  assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5663  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5664  pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5665 
5666  *node = (pm_local_variable_and_write_node_t) {
5667  {
5668  .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5669  .node_id = PM_NODE_IDENTIFY(parser),
5670  .location = {
5671  .start = target->location.start,
5672  .end = value->location.end
5673  }
5674  },
5675  .name_loc = target->location,
5676  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5677  .value = value,
5678  .name = name,
5679  .depth = depth
5680  };
5681 
5682  return node;
5683 }
5684 
5688 static pm_local_variable_operator_write_node_t *
5689 pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5690  pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5691 
5692  *node = (pm_local_variable_operator_write_node_t) {
5693  {
5694  .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5695  .node_id = PM_NODE_IDENTIFY(parser),
5696  .location = {
5697  .start = target->location.start,
5698  .end = value->location.end
5699  }
5700  },
5701  .name_loc = target->location,
5702  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5703  .value = value,
5704  .name = name,
5705  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5706  .depth = depth
5707  };
5708 
5709  return node;
5710 }
5711 
5715 static pm_local_variable_or_write_node_t *
5716 pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5717  assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5718  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5719  pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5720 
5721  *node = (pm_local_variable_or_write_node_t) {
5722  {
5723  .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5724  .node_id = PM_NODE_IDENTIFY(parser),
5725  .location = {
5726  .start = target->location.start,
5727  .end = value->location.end
5728  }
5729  },
5730  .name_loc = target->location,
5731  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5732  .value = value,
5733  .name = name,
5734  .depth = depth
5735  };
5736 
5737  return node;
5738 }
5739 
5743 static pm_local_variable_read_node_t *
5744 pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5745  if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5746 
5747  pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5748 
5749  *node = (pm_local_variable_read_node_t) {
5750  {
5751  .type = PM_LOCAL_VARIABLE_READ_NODE,
5752  .node_id = PM_NODE_IDENTIFY(parser),
5753  .location = PM_LOCATION_TOKEN_VALUE(name)
5754  },
5755  .name = name_id,
5756  .depth = depth
5757  };
5758 
5759  return node;
5760 }
5761 
5765 static pm_local_variable_read_node_t *
5766 pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5767  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5768  return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5769 }
5770 
5775 static pm_local_variable_read_node_t *
5776 pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5777  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5778  return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5779 }
5780 
5784 static pm_local_variable_write_node_t *
5785 pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5786  pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5787 
5788  *node = (pm_local_variable_write_node_t) {
5789  {
5790  .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5791  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5792  .node_id = PM_NODE_IDENTIFY(parser),
5793  .location = {
5794  .start = name_loc->start,
5795  .end = value->location.end
5796  }
5797  },
5798  .name = name,
5799  .depth = depth,
5800  .value = value,
5801  .name_loc = *name_loc,
5802  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5803  };
5804 
5805  return node;
5806 }
5807 
5811 static inline bool
5812 pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5813  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5814 }
5815 
5820 static inline bool
5821 pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5822  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5823 }
5824 
5829 static inline void
5830 pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5831  if (pm_token_is_numbered_parameter(start, end)) {
5832  PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5833  }
5834 }
5835 
5840 static pm_local_variable_target_node_t *
5841 pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5842  pm_refute_numbered_parameter(parser, location->start, location->end);
5843  pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5844 
5845  *node = (pm_local_variable_target_node_t) {
5846  {
5847  .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5848  .node_id = PM_NODE_IDENTIFY(parser),
5849  .location = *location
5850  },
5851  .name = name,
5852  .depth = depth
5853  };
5854 
5855  return node;
5856 }
5857 
5861 static pm_match_predicate_node_t *
5862 pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5863  pm_assert_value_expression(parser, value);
5864 
5865  pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5866 
5867  *node = (pm_match_predicate_node_t) {
5868  {
5869  .type = PM_MATCH_PREDICATE_NODE,
5870  .node_id = PM_NODE_IDENTIFY(parser),
5871  .location = {
5872  .start = value->location.start,
5873  .end = pattern->location.end
5874  }
5875  },
5876  .value = value,
5877  .pattern = pattern,
5878  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5879  };
5880 
5881  return node;
5882 }
5883 
5887 static pm_match_required_node_t *
5888 pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5889  pm_assert_value_expression(parser, value);
5890 
5891  pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5892 
5893  *node = (pm_match_required_node_t) {
5894  {
5895  .type = PM_MATCH_REQUIRED_NODE,
5896  .node_id = PM_NODE_IDENTIFY(parser),
5897  .location = {
5898  .start = value->location.start,
5899  .end = pattern->location.end
5900  }
5901  },
5902  .value = value,
5903  .pattern = pattern,
5904  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5905  };
5906 
5907  return node;
5908 }
5909 
5913 static pm_match_write_node_t *
5914 pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5915  pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5916 
5917  *node = (pm_match_write_node_t) {
5918  {
5919  .type = PM_MATCH_WRITE_NODE,
5920  .node_id = PM_NODE_IDENTIFY(parser),
5921  .location = call->base.location
5922  },
5923  .call = call,
5924  .targets = { 0 }
5925  };
5926 
5927  return node;
5928 }
5929 
5933 static pm_module_node_t *
5934 pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5935  pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5936 
5937  *node = (pm_module_node_t) {
5938  {
5939  .type = PM_MODULE_NODE,
5940  .node_id = PM_NODE_IDENTIFY(parser),
5941  .location = {
5942  .start = module_keyword->start,
5943  .end = end_keyword->end
5944  }
5945  },
5946  .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5947  .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5948  .constant_path = constant_path,
5949  .body = body,
5950  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5951  .name = pm_parser_constant_id_token(parser, name)
5952  };
5953 
5954  return node;
5955 }
5956 
5960 static pm_multi_target_node_t *
5961 pm_multi_target_node_create(pm_parser_t *parser) {
5962  pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5963 
5964  *node = (pm_multi_target_node_t) {
5965  {
5966  .type = PM_MULTI_TARGET_NODE,
5967  .node_id = PM_NODE_IDENTIFY(parser),
5968  .location = { .start = NULL, .end = NULL }
5969  },
5970  .lefts = { 0 },
5971  .rest = NULL,
5972  .rights = { 0 },
5973  .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5974  .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5975  };
5976 
5977  return node;
5978 }
5979 
5983 static void
5984 pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5985  if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5986  if (node->rest == NULL) {
5987  node->rest = target;
5988  } else {
5989  pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5990  pm_node_list_append(&node->rights, target);
5991  }
5992  } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5993  if (node->rest == NULL) {
5994  node->rest = target;
5995  } else {
5996  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5997  pm_node_list_append(&node->rights, target);
5998  }
5999  } else if (node->rest == NULL) {
6000  pm_node_list_append(&node->lefts, target);
6001  } else {
6002  pm_node_list_append(&node->rights, target);
6003  }
6004 
6005  if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
6006  node->base.location.start = target->location.start;
6007  }
6008 
6009  if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6010  node->base.location.end = target->location.end;
6011  }
6012 }
6013 
6017 static void
6018 pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6019  node->base.location.start = lparen->start;
6020  node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6021 }
6022 
6026 static void
6027 pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6028  node->base.location.end = rparen->end;
6029  node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6030 }
6031 
6035 static pm_multi_write_node_t *
6036 pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6037  pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6038 
6039  *node = (pm_multi_write_node_t) {
6040  {
6041  .type = PM_MULTI_WRITE_NODE,
6042  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6043  .node_id = PM_NODE_IDENTIFY(parser),
6044  .location = {
6045  .start = target->base.location.start,
6046  .end = value->location.end
6047  }
6048  },
6049  .lefts = target->lefts,
6050  .rest = target->rest,
6051  .rights = target->rights,
6052  .lparen_loc = target->lparen_loc,
6053  .rparen_loc = target->rparen_loc,
6054  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6055  .value = value
6056  };
6057 
6058  // Explicitly do not call pm_node_destroy here because we want to keep
6059  // around all of the information within the MultiWriteNode node.
6060  xfree(target);
6061 
6062  return node;
6063 }
6064 
6068 static pm_next_node_t *
6069 pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6070  assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6071  pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6072 
6073  *node = (pm_next_node_t) {
6074  {
6075  .type = PM_NEXT_NODE,
6076  .node_id = PM_NODE_IDENTIFY(parser),
6077  .location = {
6078  .start = keyword->start,
6079  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6080  }
6081  },
6082  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6083  .arguments = arguments
6084  };
6085 
6086  return node;
6087 }
6088 
6092 static pm_nil_node_t *
6093 pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6094  assert(token->type == PM_TOKEN_KEYWORD_NIL);
6095  pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6096 
6097  *node = (pm_nil_node_t) {{
6098  .type = PM_NIL_NODE,
6099  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6100  .node_id = PM_NODE_IDENTIFY(parser),
6101  .location = PM_LOCATION_TOKEN_VALUE(token)
6102  }};
6103 
6104  return node;
6105 }
6106 
6110 static pm_no_keywords_parameter_node_t *
6111 pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6112  assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6113  assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6114  pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6115 
6116  *node = (pm_no_keywords_parameter_node_t) {
6117  {
6118  .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6119  .node_id = PM_NODE_IDENTIFY(parser),
6120  .location = {
6121  .start = operator->start,
6122  .end = keyword->end
6123  }
6124  },
6125  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6126  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6127  };
6128 
6129  return node;
6130 }
6131 
6135 static pm_numbered_parameters_node_t *
6136 pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6137  pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6138 
6139  *node = (pm_numbered_parameters_node_t) {
6140  {
6141  .type = PM_NUMBERED_PARAMETERS_NODE,
6142  .node_id = PM_NODE_IDENTIFY(parser),
6143  .location = *location
6144  },
6145  .maximum = maximum
6146  };
6147 
6148  return node;
6149 }
6150 
6155 #define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6156 
6163 static uint32_t
6164 pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6165  const uint8_t *start = token->start + 1;
6166  const uint8_t *end = token->end;
6167 
6168  ptrdiff_t diff = end - start;
6169  assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
6170  size_t length = (size_t) diff;
6171 
6172  char *digits = xcalloc(length + 1, sizeof(char));
6173  memcpy(digits, start, length);
6174  digits[length] = '\0';
6175 
6176  char *endptr;
6177  errno = 0;
6178  unsigned long value = strtoul(digits, &endptr, 10);
6179 
6180  if ((digits == endptr) || (*endptr != '\0')) {
6181  pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6182  value = 0;
6183  }
6184 
6185  xfree(digits);
6186 
6187  if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6188  PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6189  value = 0;
6190  }
6191 
6192  return (uint32_t) value;
6193 }
6194 
6195 #undef NTH_REF_MAX
6196 
6200 static pm_numbered_reference_read_node_t *
6201 pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6202  assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6203  pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6204 
6205  *node = (pm_numbered_reference_read_node_t) {
6206  {
6207  .type = PM_NUMBERED_REFERENCE_READ_NODE,
6208  .node_id = PM_NODE_IDENTIFY(parser),
6209  .location = PM_LOCATION_TOKEN_VALUE(name),
6210  },
6211  .number = pm_numbered_reference_read_node_number(parser, name)
6212  };
6213 
6214  return node;
6215 }
6216 
6220 static pm_optional_parameter_node_t *
6221 pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6222  pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6223 
6224  *node = (pm_optional_parameter_node_t) {
6225  {
6226  .type = PM_OPTIONAL_PARAMETER_NODE,
6227  .node_id = PM_NODE_IDENTIFY(parser),
6228  .location = {
6229  .start = name->start,
6230  .end = value->location.end
6231  }
6232  },
6233  .name = pm_parser_constant_id_token(parser, name),
6234  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6235  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6236  .value = value
6237  };
6238 
6239  return node;
6240 }
6241 
6245 static pm_or_node_t *
6246 pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6247  pm_assert_value_expression(parser, left);
6248 
6249  pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6250 
6251  *node = (pm_or_node_t) {
6252  {
6253  .type = PM_OR_NODE,
6254  .node_id = PM_NODE_IDENTIFY(parser),
6255  .location = {
6256  .start = left->location.start,
6257  .end = right->location.end
6258  }
6259  },
6260  .left = left,
6261  .right = right,
6262  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6263  };
6264 
6265  return node;
6266 }
6267 
6271 static pm_parameters_node_t *
6272 pm_parameters_node_create(pm_parser_t *parser) {
6273  pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6274 
6275  *node = (pm_parameters_node_t) {
6276  {
6277  .type = PM_PARAMETERS_NODE,
6278  .node_id = PM_NODE_IDENTIFY(parser),
6279  .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6280  },
6281  .rest = NULL,
6282  .keyword_rest = NULL,
6283  .block = NULL,
6284  .requireds = { 0 },
6285  .optionals = { 0 },
6286  .posts = { 0 },
6287  .keywords = { 0 }
6288  };
6289 
6290  return node;
6291 }
6292 
6296 static void
6297 pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6298  if (params->base.location.start == NULL) {
6299  params->base.location.start = param->location.start;
6300  } else {
6301  params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6302  }
6303 
6304  if (params->base.location.end == NULL) {
6305  params->base.location.end = param->location.end;
6306  } else {
6307  params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6308  }
6309 }
6310 
6314 static void
6315 pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6316  pm_parameters_node_location_set(params, param);
6317  pm_node_list_append(&params->requireds, param);
6318 }
6319 
6323 static void
6324 pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6325  pm_parameters_node_location_set(params, (pm_node_t *) param);
6326  pm_node_list_append(&params->optionals, (pm_node_t *) param);
6327 }
6328 
6332 static void
6333 pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6334  pm_parameters_node_location_set(params, param);
6335  pm_node_list_append(&params->posts, param);
6336 }
6337 
6341 static void
6342 pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6343  pm_parameters_node_location_set(params, param);
6344  params->rest = param;
6345 }
6346 
6350 static void
6351 pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6352  pm_parameters_node_location_set(params, param);
6353  pm_node_list_append(&params->keywords, param);
6354 }
6355 
6359 static void
6360 pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6361  assert(params->keyword_rest == NULL);
6362  pm_parameters_node_location_set(params, param);
6363  params->keyword_rest = param;
6364 }
6365 
6369 static void
6370 pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6371  assert(params->block == NULL);
6372  pm_parameters_node_location_set(params, (pm_node_t *) param);
6373  params->block = param;
6374 }
6375 
6379 static pm_program_node_t *
6380 pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6381  pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6382 
6383  *node = (pm_program_node_t) {
6384  {
6385  .type = PM_PROGRAM_NODE,
6386  .node_id = PM_NODE_IDENTIFY(parser),
6387  .location = {
6388  .start = statements == NULL ? parser->start : statements->base.location.start,
6389  .end = statements == NULL ? parser->end : statements->base.location.end
6390  }
6391  },
6392  .locals = *locals,
6393  .statements = statements
6394  };
6395 
6396  return node;
6397 }
6398 
6402 static pm_parentheses_node_t *
6403 pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6404  pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6405 
6406  *node = (pm_parentheses_node_t) {
6407  {
6408  .type = PM_PARENTHESES_NODE,
6409  .node_id = PM_NODE_IDENTIFY(parser),
6410  .location = {
6411  .start = opening->start,
6412  .end = closing->end
6413  }
6414  },
6415  .body = body,
6416  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6417  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6418  };
6419 
6420  return node;
6421 }
6422 
6426 static pm_pinned_expression_node_t *
6427 pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6428  pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6429 
6430  *node = (pm_pinned_expression_node_t) {
6431  {
6432  .type = PM_PINNED_EXPRESSION_NODE,
6433  .node_id = PM_NODE_IDENTIFY(parser),
6434  .location = {
6435  .start = operator->start,
6436  .end = rparen->end
6437  }
6438  },
6439  .expression = expression,
6440  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6441  .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6442  .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6443  };
6444 
6445  return node;
6446 }
6447 
6451 static pm_pinned_variable_node_t *
6452 pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6453  pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6454 
6455  *node = (pm_pinned_variable_node_t) {
6456  {
6457  .type = PM_PINNED_VARIABLE_NODE,
6458  .node_id = PM_NODE_IDENTIFY(parser),
6459  .location = {
6460  .start = operator->start,
6461  .end = variable->location.end
6462  }
6463  },
6464  .variable = variable,
6465  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6466  };
6467 
6468  return node;
6469 }
6470 
6474 static pm_post_execution_node_t *
6475 pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6476  pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6477 
6478  *node = (pm_post_execution_node_t) {
6479  {
6480  .type = PM_POST_EXECUTION_NODE,
6481  .node_id = PM_NODE_IDENTIFY(parser),
6482  .location = {
6483  .start = keyword->start,
6484  .end = closing->end
6485  }
6486  },
6487  .statements = statements,
6488  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6489  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6490  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6491  };
6492 
6493  return node;
6494 }
6495 
6499 static pm_pre_execution_node_t *
6500 pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6501  pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6502 
6503  *node = (pm_pre_execution_node_t) {
6504  {
6505  .type = PM_PRE_EXECUTION_NODE,
6506  .node_id = PM_NODE_IDENTIFY(parser),
6507  .location = {
6508  .start = keyword->start,
6509  .end = closing->end
6510  }
6511  },
6512  .statements = statements,
6513  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6514  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6515  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6516  };
6517 
6518  return node;
6519 }
6520 
6524 static pm_range_node_t *
6525 pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6526  pm_assert_value_expression(parser, left);
6527  pm_assert_value_expression(parser, right);
6528 
6529  pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6530  pm_node_flags_t flags = 0;
6531 
6532  // Indicate that this node is an exclusive range if the operator is `...`.
6533  if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6534  flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6535  }
6536 
6537  // Indicate that this node is a static literal (i.e., can be compiled with
6538  // a putobject in CRuby) if the left and right are implicit nil, explicit
6539  // nil, or integers.
6540  if (
6541  (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6542  (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6543  ) {
6544  flags |= PM_NODE_FLAG_STATIC_LITERAL;
6545  }
6546 
6547  *node = (pm_range_node_t) {
6548  {
6549  .type = PM_RANGE_NODE,
6550  .flags = flags,
6551  .node_id = PM_NODE_IDENTIFY(parser),
6552  .location = {
6553  .start = (left == NULL ? operator->start : left->location.start),
6554  .end = (right == NULL ? operator->end : right->location.end)
6555  }
6556  },
6557  .left = left,
6558  .right = right,
6559  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6560  };
6561 
6562  return node;
6563 }
6564 
6568 static pm_redo_node_t *
6569 pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6570  assert(token->type == PM_TOKEN_KEYWORD_REDO);
6571  pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6572 
6573  *node = (pm_redo_node_t) {{
6574  .type = PM_REDO_NODE,
6575  .node_id = PM_NODE_IDENTIFY(parser),
6576  .location = PM_LOCATION_TOKEN_VALUE(token)
6577  }};
6578 
6579  return node;
6580 }
6581 
6586 static pm_regular_expression_node_t *
6587 pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6588  pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6589 
6590  *node = (pm_regular_expression_node_t) {
6591  {
6592  .type = PM_REGULAR_EXPRESSION_NODE,
6593  .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6594  .node_id = PM_NODE_IDENTIFY(parser),
6595  .location = {
6596  .start = MIN(opening->start, closing->start),
6597  .end = MAX(opening->end, closing->end)
6598  }
6599  },
6600  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6601  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6602  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6603  .unescaped = *unescaped
6604  };
6605 
6606  return node;
6607 }
6608 
6612 static inline pm_regular_expression_node_t *
6613 pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6614  return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6615 }
6616 
6620 static pm_required_parameter_node_t *
6621 pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6622  pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6623 
6624  *node = (pm_required_parameter_node_t) {
6625  {
6626  .type = PM_REQUIRED_PARAMETER_NODE,
6627  .node_id = PM_NODE_IDENTIFY(parser),
6628  .location = PM_LOCATION_TOKEN_VALUE(token)
6629  },
6630  .name = pm_parser_constant_id_token(parser, token)
6631  };
6632 
6633  return node;
6634 }
6635 
6639 static pm_rescue_modifier_node_t *
6640 pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6641  pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6642 
6643  *node = (pm_rescue_modifier_node_t) {
6644  {
6645  .type = PM_RESCUE_MODIFIER_NODE,
6646  .node_id = PM_NODE_IDENTIFY(parser),
6647  .location = {
6648  .start = expression->location.start,
6649  .end = rescue_expression->location.end
6650  }
6651  },
6652  .expression = expression,
6653  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6654  .rescue_expression = rescue_expression
6655  };
6656 
6657  return node;
6658 }
6659 
6663 static pm_rescue_node_t *
6664 pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6665  pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6666 
6667  *node = (pm_rescue_node_t) {
6668  {
6669  .type = PM_RESCUE_NODE,
6670  .node_id = PM_NODE_IDENTIFY(parser),
6671  .location = PM_LOCATION_TOKEN_VALUE(keyword)
6672  },
6673  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6674  .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6675  .reference = NULL,
6676  .statements = NULL,
6677  .subsequent = NULL,
6678  .exceptions = { 0 }
6679  };
6680 
6681  return node;
6682 }
6683 
6684 static inline void
6685 pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6686  node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6687 }
6688 
6692 static void
6693 pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6694  node->reference = reference;
6695  node->base.location.end = reference->location.end;
6696 }
6697 
6701 static void
6702 pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6703  node->statements = statements;
6704  if (pm_statements_node_body_length(statements) > 0) {
6705  node->base.location.end = statements->base.location.end;
6706  }
6707 }
6708 
6712 static void
6713 pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6714  node->subsequent = subsequent;
6715  node->base.location.end = subsequent->base.location.end;
6716 }
6717 
6721 static void
6722 pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6723  pm_node_list_append(&node->exceptions, exception);
6724  node->base.location.end = exception->location.end;
6725 }
6726 
6730 static pm_rest_parameter_node_t *
6731 pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6732  pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6733 
6734  *node = (pm_rest_parameter_node_t) {
6735  {
6736  .type = PM_REST_PARAMETER_NODE,
6737  .node_id = PM_NODE_IDENTIFY(parser),
6738  .location = {
6739  .start = operator->start,
6740  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6741  }
6742  },
6743  .name = pm_parser_optional_constant_id_token(parser, name),
6744  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6745  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6746  };
6747 
6748  return node;
6749 }
6750 
6754 static pm_retry_node_t *
6755 pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6756  assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6757  pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6758 
6759  *node = (pm_retry_node_t) {{
6760  .type = PM_RETRY_NODE,
6761  .node_id = PM_NODE_IDENTIFY(parser),
6762  .location = PM_LOCATION_TOKEN_VALUE(token)
6763  }};
6764 
6765  return node;
6766 }
6767 
6771 static pm_return_node_t *
6772 pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6773  pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6774 
6775  *node = (pm_return_node_t) {
6776  {
6777  .type = PM_RETURN_NODE,
6778  .node_id = PM_NODE_IDENTIFY(parser),
6779  .location = {
6780  .start = keyword->start,
6781  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6782  }
6783  },
6784  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6785  .arguments = arguments
6786  };
6787 
6788  return node;
6789 }
6790 
6794 static pm_self_node_t *
6795 pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6796  assert(token->type == PM_TOKEN_KEYWORD_SELF);
6797  pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6798 
6799  *node = (pm_self_node_t) {{
6800  .type = PM_SELF_NODE,
6801  .node_id = PM_NODE_IDENTIFY(parser),
6802  .location = PM_LOCATION_TOKEN_VALUE(token)
6803  }};
6804 
6805  return node;
6806 }
6807 
6811 static pm_shareable_constant_node_t *
6812 pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6813  pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6814 
6815  *node = (pm_shareable_constant_node_t) {
6816  {
6817  .type = PM_SHAREABLE_CONSTANT_NODE,
6818  .flags = (pm_node_flags_t) value,
6819  .node_id = PM_NODE_IDENTIFY(parser),
6820  .location = PM_LOCATION_NODE_VALUE(write)
6821  },
6822  .write = write
6823  };
6824 
6825  return node;
6826 }
6827 
6831 static pm_singleton_class_node_t *
6832 pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6833  pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6834 
6835  *node = (pm_singleton_class_node_t) {
6836  {
6837  .type = PM_SINGLETON_CLASS_NODE,
6838  .node_id = PM_NODE_IDENTIFY(parser),
6839  .location = {
6840  .start = class_keyword->start,
6841  .end = end_keyword->end
6842  }
6843  },
6844  .locals = *locals,
6845  .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6846  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6847  .expression = expression,
6848  .body = body,
6849  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6850  };
6851 
6852  return node;
6853 }
6854 
6858 static pm_source_encoding_node_t *
6859 pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6860  assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6861  pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6862 
6863  *node = (pm_source_encoding_node_t) {{
6864  .type = PM_SOURCE_ENCODING_NODE,
6865  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6866  .node_id = PM_NODE_IDENTIFY(parser),
6867  .location = PM_LOCATION_TOKEN_VALUE(token)
6868  }};
6869 
6870  return node;
6871 }
6872 
6876 static pm_source_file_node_t*
6877 pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6878  pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6879  assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6880 
6881  pm_node_flags_t flags = 0;
6882 
6883  switch (parser->frozen_string_literal) {
6884  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6885  flags |= PM_STRING_FLAGS_MUTABLE;
6886  break;
6887  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6888  flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6889  break;
6890  }
6891 
6892  *node = (pm_source_file_node_t) {
6893  {
6894  .type = PM_SOURCE_FILE_NODE,
6895  .flags = flags,
6896  .node_id = PM_NODE_IDENTIFY(parser),
6897  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6898  },
6899  .filepath = parser->filepath
6900  };
6901 
6902  return node;
6903 }
6904 
6908 static pm_source_line_node_t *
6909 pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6910  assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6911  pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6912 
6913  *node = (pm_source_line_node_t) {{
6914  .type = PM_SOURCE_LINE_NODE,
6915  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6916  .node_id = PM_NODE_IDENTIFY(parser),
6917  .location = PM_LOCATION_TOKEN_VALUE(token)
6918  }};
6919 
6920  return node;
6921 }
6922 
6926 static pm_splat_node_t *
6927 pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6928  pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6929 
6930  *node = (pm_splat_node_t) {
6931  {
6932  .type = PM_SPLAT_NODE,
6933  .node_id = PM_NODE_IDENTIFY(parser),
6934  .location = {
6935  .start = operator->start,
6936  .end = (expression == NULL ? operator->end : expression->location.end)
6937  }
6938  },
6939  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6940  .expression = expression
6941  };
6942 
6943  return node;
6944 }
6945 
6949 static pm_statements_node_t *
6950 pm_statements_node_create(pm_parser_t *parser) {
6951  pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6952 
6953  *node = (pm_statements_node_t) {
6954  {
6955  .type = PM_STATEMENTS_NODE,
6956  .node_id = PM_NODE_IDENTIFY(parser),
6957  .location = PM_LOCATION_NULL_VALUE(parser)
6958  },
6959  .body = { 0 }
6960  };
6961 
6962  return node;
6963 }
6964 
6968 static size_t
6969 pm_statements_node_body_length(pm_statements_node_t *node) {
6970  return node && node->body.size;
6971 }
6972 
6976 static void
6977 pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6978  node->base.location = (pm_location_t) { .start = start, .end = end };
6979 }
6980 
6985 static inline void
6986 pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6987  if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6988  node->base.location.start = statement->location.start;
6989  }
6990 
6991  if (statement->location.end > node->base.location.end) {
6992  node->base.location.end = statement->location.end;
6993  }
6994 }
6995 
6999 static void
7000 pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
7001  pm_statements_node_body_update(node, statement);
7002 
7003  if (node->body.size > 0) {
7004  const pm_node_t *previous = node->body.nodes[node->body.size - 1];
7005 
7006  switch (PM_NODE_TYPE(previous)) {
7007  case PM_BREAK_NODE:
7008  case PM_NEXT_NODE:
7009  case PM_REDO_NODE:
7010  case PM_RETRY_NODE:
7011  case PM_RETURN_NODE:
7012  pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7013  break;
7014  default:
7015  break;
7016  }
7017  }
7018 
7019  pm_node_list_append(&node->body, statement);
7020  if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7021 }
7022 
7026 static void
7027 pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7028  pm_statements_node_body_update(node, statement);
7029  pm_node_list_prepend(&node->body, statement);
7030  pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7031 }
7032 
7036 static inline pm_string_node_t *
7037 pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7038  pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7039  pm_node_flags_t flags = 0;
7040 
7041  switch (parser->frozen_string_literal) {
7042  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7043  flags = PM_STRING_FLAGS_MUTABLE;
7044  break;
7045  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7046  flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7047  break;
7048  }
7049 
7050  *node = (pm_string_node_t) {
7051  {
7052  .type = PM_STRING_NODE,
7053  .flags = flags,
7054  .node_id = PM_NODE_IDENTIFY(parser),
7055  .location = {
7056  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7057  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7058  }
7059  },
7060  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7061  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7062  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7063  .unescaped = *string
7064  };
7065 
7066  return node;
7067 }
7068 
7072 static pm_string_node_t *
7073 pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7074  return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7075 }
7076 
7081 static pm_string_node_t *
7082 pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7083  pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7084  parser->current_string = PM_STRING_EMPTY;
7085  return node;
7086 }
7087 
7091 static pm_super_node_t *
7092 pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7093  assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7094  pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7095 
7096  const uint8_t *end = pm_arguments_end(arguments);
7097  if (end == NULL) {
7098  assert(false && "unreachable");
7099  }
7100 
7101  *node = (pm_super_node_t) {
7102  {
7103  .type = PM_SUPER_NODE,
7104  .node_id = PM_NODE_IDENTIFY(parser),
7105  .location = {
7106  .start = keyword->start,
7107  .end = end,
7108  }
7109  },
7110  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7111  .lparen_loc = arguments->opening_loc,
7112  .arguments = arguments->arguments,
7113  .rparen_loc = arguments->closing_loc,
7114  .block = arguments->block
7115  };
7116 
7117  return node;
7118 }
7119 
7124 static bool
7125 pm_ascii_only_p(const pm_string_t *contents) {
7126  const size_t length = pm_string_length(contents);
7127  const uint8_t *source = pm_string_source(contents);
7128 
7129  for (size_t index = 0; index < length; index++) {
7130  if (source[index] & 0x80) return false;
7131  }
7132 
7133  return true;
7134 }
7135 
7139 static void
7140 parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7141  for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7142  size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7143 
7144  if (width == 0) {
7145  pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7146  break;
7147  }
7148 
7149  cursor += width;
7150  }
7151 }
7152 
7157 static void
7158 parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7159  const pm_encoding_t *encoding = parser->encoding;
7160 
7161  for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7162  size_t width = encoding->char_width(cursor, end - cursor);
7163 
7164  if (width == 0) {
7165  pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7166  break;
7167  }
7168 
7169  cursor += width;
7170  }
7171 }
7172 
7182 static inline pm_node_flags_t
7183 parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7184  if (parser->explicit_encoding != NULL) {
7185  // A Symbol may optionally have its encoding explicitly set. This will
7186  // happen if an escape sequence results in a non-ASCII code point.
7187  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7188  if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7189  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7190  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7191  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7192  } else if (validate) {
7193  parse_symbol_encoding_validate_other(parser, location, contents);
7194  }
7195  } else if (pm_ascii_only_p(contents)) {
7196  // Ruby stipulates that all source files must use an ASCII-compatible
7197  // encoding. Thus, all symbols appearing in source are eligible for
7198  // "downgrading" to US-ASCII.
7199  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7200  } else if (validate) {
7201  parse_symbol_encoding_validate_other(parser, location, contents);
7202  }
7203 
7204  return 0;
7205 }
7206 
7207 static pm_node_flags_t
7208 parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7209  assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7210  (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7211  (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7212  (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7213 
7214  // There's special validation logic used if a string does not contain any character escape sequences.
7215  if (parser->explicit_encoding == NULL) {
7216  // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7217  // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7218  // the US-ASCII encoding.
7219  if (ascii_only) {
7220  return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7221  }
7222 
7223  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7224  if (!ascii_only) {
7225  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7226  }
7227  } else if (parser->encoding != modifier_encoding) {
7228  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7229 
7230  if (modifier == 'n' && !ascii_only) {
7231  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7232  }
7233  }
7234 
7235  return flags;
7236  }
7237 
7238  // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7239  bool mixed_encoding = false;
7240 
7241  if (mixed_encoding) {
7242  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7243  } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7244  // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7245  bool valid_string_in_modifier_encoding = true;
7246 
7247  if (!valid_string_in_modifier_encoding) {
7248  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7249  }
7250  } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7251  // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7252  if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7253  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7254  }
7255  }
7256 
7257  // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7258  return flags;
7259 }
7260 
7267 static pm_node_flags_t
7268 parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7269  // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7270  bool valid_unicode_range = true;
7271  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7272  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7273  return flags;
7274  }
7275 
7276  // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7277  // to multi-byte characters are allowed.
7278  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7279  // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7280  // following error message appearing twice. We do the same for compatibility.
7281  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7282  }
7283 
7292  if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7293  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7294  }
7295 
7296  if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7297  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7298  }
7299 
7300  if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7301  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7302  }
7303 
7304  if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7305  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7306  }
7307 
7308  // At this point no encoding modifiers will be present on the regular expression as they would have already
7309  // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7310  // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7311  if (ascii_only) {
7312  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7313  }
7314 
7315  // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7316  // or by specifying a modifier.
7317  //
7318  // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7319  if (parser->explicit_encoding != NULL) {
7320  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7321  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7322  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7323  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7324  }
7325  }
7326 
7327  return 0;
7328 }
7329 
7334 static pm_symbol_node_t *
7335 pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7336  pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7337 
7338  *node = (pm_symbol_node_t) {
7339  {
7340  .type = PM_SYMBOL_NODE,
7341  .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7342  .node_id = PM_NODE_IDENTIFY(parser),
7343  .location = {
7344  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7345  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7346  }
7347  },
7348  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7349  .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7350  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7351  .unescaped = *unescaped
7352  };
7353 
7354  return node;
7355 }
7356 
7360 static inline pm_symbol_node_t *
7361 pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7362  return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7363 }
7364 
7368 static pm_symbol_node_t *
7369 pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7370  pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7371  parser->current_string = PM_STRING_EMPTY;
7372  return node;
7373 }
7374 
7378 static pm_symbol_node_t *
7379 pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7380  pm_symbol_node_t *node;
7381 
7382  switch (token->type) {
7383  case PM_TOKEN_LABEL: {
7384  pm_token_t opening = not_provided(parser);
7385  pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7386 
7387  pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7388  node = pm_symbol_node_create(parser, &opening, &label, &closing);
7389 
7390  assert((label.end - label.start) >= 0);
7391  pm_string_shared_init(&node->unescaped, label.start, label.end);
7392  pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7393 
7394  break;
7395  }
7396  case PM_TOKEN_MISSING: {
7397  pm_token_t opening = not_provided(parser);
7398  pm_token_t closing = not_provided(parser);
7399 
7400  pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7401  node = pm_symbol_node_create(parser, &opening, &label, &closing);
7402  break;
7403  }
7404  default:
7405  assert(false && "unreachable");
7406  node = NULL;
7407  break;
7408  }
7409 
7410  return node;
7411 }
7412 
7416 static pm_symbol_node_t *
7417 pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7418  pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7419 
7420  *node = (pm_symbol_node_t) {
7421  {
7422  .type = PM_SYMBOL_NODE,
7423  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7424  .node_id = PM_NODE_IDENTIFY(parser),
7425  .location = PM_LOCATION_NULL_VALUE(parser)
7426  },
7427  .value_loc = PM_LOCATION_NULL_VALUE(parser),
7428  .unescaped = { 0 }
7429  };
7430 
7431  pm_string_constant_init(&node->unescaped, content, strlen(content));
7432  return node;
7433 }
7434 
7438 static bool
7439 pm_symbol_node_label_p(pm_node_t *node) {
7440  const uint8_t *end = NULL;
7441 
7442  switch (PM_NODE_TYPE(node)) {
7443  case PM_SYMBOL_NODE:
7444  end = ((pm_symbol_node_t *) node)->closing_loc.end;
7445  break;
7446  case PM_INTERPOLATED_SYMBOL_NODE:
7447  end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7448  break;
7449  default:
7450  return false;
7451  }
7452 
7453  return (end != NULL) && (end[-1] == ':');
7454 }
7455 
7459 static pm_symbol_node_t *
7460 pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7461  pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7462 
7463  *new_node = (pm_symbol_node_t) {
7464  {
7465  .type = PM_SYMBOL_NODE,
7466  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7467  .node_id = PM_NODE_IDENTIFY(parser),
7468  .location = {
7469  .start = opening->start,
7470  .end = closing->end
7471  }
7472  },
7473  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7474  .value_loc = node->content_loc,
7475  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7476  .unescaped = node->unescaped
7477  };
7478 
7479  pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7480  pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7481 
7482  // We are explicitly _not_ using pm_node_destroy here because we don't want
7483  // to trash the unescaped string. We could instead copy the string if we
7484  // know that it is owned, but we're taking the fast path for now.
7485  xfree(node);
7486 
7487  return new_node;
7488 }
7489 
7493 static pm_string_node_t *
7494 pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7495  pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7496  pm_node_flags_t flags = 0;
7497 
7498  switch (parser->frozen_string_literal) {
7499  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7500  flags = PM_STRING_FLAGS_MUTABLE;
7501  break;
7502  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7503  flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7504  break;
7505  }
7506 
7507  *new_node = (pm_string_node_t) {
7508  {
7509  .type = PM_STRING_NODE,
7510  .flags = flags,
7511  .node_id = PM_NODE_IDENTIFY(parser),
7512  .location = node->base.location
7513  },
7514  .opening_loc = node->opening_loc,
7515  .content_loc = node->value_loc,
7516  .closing_loc = node->closing_loc,
7517  .unescaped = node->unescaped
7518  };
7519 
7520  // We are explicitly _not_ using pm_node_destroy here because we don't want
7521  // to trash the unescaped string. We could instead copy the string if we
7522  // know that it is owned, but we're taking the fast path for now.
7523  xfree(node);
7524 
7525  return new_node;
7526 }
7527 
7531 static pm_true_node_t *
7532 pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7533  assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7534  pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7535 
7536  *node = (pm_true_node_t) {{
7537  .type = PM_TRUE_NODE,
7538  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7539  .node_id = PM_NODE_IDENTIFY(parser),
7540  .location = PM_LOCATION_TOKEN_VALUE(token)
7541  }};
7542 
7543  return node;
7544 }
7545 
7549 static pm_true_node_t *
7550 pm_true_node_synthesized_create(pm_parser_t *parser) {
7551  pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7552 
7553  *node = (pm_true_node_t) {{
7554  .type = PM_TRUE_NODE,
7555  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7556  .node_id = PM_NODE_IDENTIFY(parser),
7557  .location = { .start = parser->start, .end = parser->end }
7558  }};
7559 
7560  return node;
7561 }
7562 
7566 static pm_undef_node_t *
7567 pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7568  assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7569  pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7570 
7571  *node = (pm_undef_node_t) {
7572  {
7573  .type = PM_UNDEF_NODE,
7574  .node_id = PM_NODE_IDENTIFY(parser),
7575  .location = PM_LOCATION_TOKEN_VALUE(token),
7576  },
7577  .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7578  .names = { 0 }
7579  };
7580 
7581  return node;
7582 }
7583 
7587 static void
7588 pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7589  node->base.location.end = name->location.end;
7590  pm_node_list_append(&node->names, name);
7591 }
7592 
7596 static pm_unless_node_t *
7597 pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7598  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7599  pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7600 
7601  const uint8_t *end;
7602  if (statements != NULL) {
7603  end = statements->base.location.end;
7604  } else {
7605  end = predicate->location.end;
7606  }
7607 
7608  *node = (pm_unless_node_t) {
7609  {
7610  .type = PM_UNLESS_NODE,
7611  .flags = PM_NODE_FLAG_NEWLINE,
7612  .node_id = PM_NODE_IDENTIFY(parser),
7613  .location = {
7614  .start = keyword->start,
7615  .end = end
7616  },
7617  },
7618  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7619  .predicate = predicate,
7620  .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7621  .statements = statements,
7622  .else_clause = NULL,
7623  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7624  };
7625 
7626  return node;
7627 }
7628 
7632 static pm_unless_node_t *
7633 pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7634  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7635  pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7636 
7637  pm_statements_node_t *statements = pm_statements_node_create(parser);
7638  pm_statements_node_body_append(parser, statements, statement, true);
7639 
7640  *node = (pm_unless_node_t) {
7641  {
7642  .type = PM_UNLESS_NODE,
7643  .flags = PM_NODE_FLAG_NEWLINE,
7644  .node_id = PM_NODE_IDENTIFY(parser),
7645  .location = {
7646  .start = statement->location.start,
7647  .end = predicate->location.end
7648  },
7649  },
7650  .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7651  .predicate = predicate,
7652  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7653  .statements = statements,
7654  .else_clause = NULL,
7655  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7656  };
7657 
7658  return node;
7659 }
7660 
7661 static inline void
7662 pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7663  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7664  node->base.location.end = end_keyword->end;
7665 }
7666 
7672 static void
7673 pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7674  assert(parser->current_block_exits != NULL);
7675 
7676  // All of the block exits that we want to remove should be within the
7677  // statements, and since we are modifying the statements, we shouldn't have
7678  // to check the end location.
7679  const uint8_t *start = statements->base.location.start;
7680 
7681  for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7682  pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7683  if (block_exit->location.start < start) break;
7684 
7685  // Implicitly remove from the list by lowering the size.
7686  parser->current_block_exits->size--;
7687  }
7688 }
7689 
7693 static pm_until_node_t *
7694 pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7695  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7696  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7697 
7698  *node = (pm_until_node_t) {
7699  {
7700  .type = PM_UNTIL_NODE,
7701  .flags = flags,
7702  .node_id = PM_NODE_IDENTIFY(parser),
7703  .location = {
7704  .start = keyword->start,
7705  .end = closing->end,
7706  },
7707  },
7708  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7709  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7710  .predicate = predicate,
7711  .statements = statements
7712  };
7713 
7714  return node;
7715 }
7716 
7720 static pm_until_node_t *
7721 pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7722  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7723  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7724  pm_loop_modifier_block_exits(parser, statements);
7725 
7726  *node = (pm_until_node_t) {
7727  {
7728  .type = PM_UNTIL_NODE,
7729  .flags = flags,
7730  .node_id = PM_NODE_IDENTIFY(parser),
7731  .location = {
7732  .start = statements->base.location.start,
7733  .end = predicate->location.end,
7734  },
7735  },
7736  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7737  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7738  .predicate = predicate,
7739  .statements = statements
7740  };
7741 
7742  return node;
7743 }
7744 
7748 static pm_when_node_t *
7749 pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7750  pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7751 
7752  *node = (pm_when_node_t) {
7753  {
7754  .type = PM_WHEN_NODE,
7755  .node_id = PM_NODE_IDENTIFY(parser),
7756  .location = {
7757  .start = keyword->start,
7758  .end = NULL
7759  }
7760  },
7761  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7762  .statements = NULL,
7763  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7764  .conditions = { 0 }
7765  };
7766 
7767  return node;
7768 }
7769 
7773 static void
7774 pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7775  node->base.location.end = condition->location.end;
7776  pm_node_list_append(&node->conditions, condition);
7777 }
7778 
7782 static inline void
7783 pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7784  node->base.location.end = then_keyword->end;
7785  node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7786 }
7787 
7791 static void
7792 pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7793  if (statements->base.location.end > node->base.location.end) {
7794  node->base.location.end = statements->base.location.end;
7795  }
7796 
7797  node->statements = statements;
7798 }
7799 
7803 static pm_while_node_t *
7804 pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7805  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7806  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7807 
7808  *node = (pm_while_node_t) {
7809  {
7810  .type = PM_WHILE_NODE,
7811  .flags = flags,
7812  .node_id = PM_NODE_IDENTIFY(parser),
7813  .location = {
7814  .start = keyword->start,
7815  .end = closing->end
7816  },
7817  },
7818  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7819  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7820  .predicate = predicate,
7821  .statements = statements
7822  };
7823 
7824  return node;
7825 }
7826 
7830 static pm_while_node_t *
7831 pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7832  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7833  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7834  pm_loop_modifier_block_exits(parser, statements);
7835 
7836  *node = (pm_while_node_t) {
7837  {
7838  .type = PM_WHILE_NODE,
7839  .flags = flags,
7840  .node_id = PM_NODE_IDENTIFY(parser),
7841  .location = {
7842  .start = statements->base.location.start,
7843  .end = predicate->location.end
7844  },
7845  },
7846  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7847  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7848  .predicate = predicate,
7849  .statements = statements
7850  };
7851 
7852  return node;
7853 }
7854 
7858 static pm_while_node_t *
7859 pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7860  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7861 
7862  *node = (pm_while_node_t) {
7863  {
7864  .type = PM_WHILE_NODE,
7865  .node_id = PM_NODE_IDENTIFY(parser),
7866  .location = PM_LOCATION_NULL_VALUE(parser)
7867  },
7868  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7869  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7870  .predicate = predicate,
7871  .statements = statements
7872  };
7873 
7874  return node;
7875 }
7876 
7881 static pm_x_string_node_t *
7882 pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7883  pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7884 
7885  *node = (pm_x_string_node_t) {
7886  {
7887  .type = PM_X_STRING_NODE,
7888  .flags = PM_STRING_FLAGS_FROZEN,
7889  .node_id = PM_NODE_IDENTIFY(parser),
7890  .location = {
7891  .start = opening->start,
7892  .end = closing->end
7893  },
7894  },
7895  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7896  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7897  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7898  .unescaped = *unescaped
7899  };
7900 
7901  return node;
7902 }
7903 
7907 static inline pm_x_string_node_t *
7908 pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7909  return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7910 }
7911 
7915 static pm_yield_node_t *
7916 pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7917  pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7918 
7919  const uint8_t *end;
7920  if (rparen_loc->start != NULL) {
7921  end = rparen_loc->end;
7922  } else if (arguments != NULL) {
7923  end = arguments->base.location.end;
7924  } else if (lparen_loc->start != NULL) {
7925  end = lparen_loc->end;
7926  } else {
7927  end = keyword->end;
7928  }
7929 
7930  *node = (pm_yield_node_t) {
7931  {
7932  .type = PM_YIELD_NODE,
7933  .node_id = PM_NODE_IDENTIFY(parser),
7934  .location = {
7935  .start = keyword->start,
7936  .end = end
7937  },
7938  },
7939  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7940  .lparen_loc = *lparen_loc,
7941  .arguments = arguments,
7942  .rparen_loc = *rparen_loc
7943  };
7944 
7945  return node;
7946 }
7947 
7948 #undef PM_NODE_ALLOC
7949 #undef PM_NODE_IDENTIFY
7950 
7955 static int
7956 pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7957  pm_scope_t *scope = parser->current_scope;
7958  int depth = 0;
7959 
7960  while (scope != NULL) {
7961  if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7962  if (scope->closed) break;
7963 
7964  scope = scope->previous;
7965  depth++;
7966  }
7967 
7968  return -1;
7969 }
7970 
7976 static inline int
7977 pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7978  return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7979 }
7980 
7984 static inline void
7985 pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7986  pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7987 }
7988 
7992 static pm_constant_id_t
7993 pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7994  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7995  if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7996  return constant_id;
7997 }
7998 
8002 static inline pm_constant_id_t
8003 pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
8004  return pm_parser_local_add_location(parser, token->start, token->end, reads);
8005 }
8006 
8010 static pm_constant_id_t
8011 pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8012  pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8013  if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8014  return constant_id;
8015 }
8016 
8020 static pm_constant_id_t
8021 pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8022  pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8023  if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8024  return constant_id;
8025 }
8026 
8034 static bool
8035 pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8036  // We want to check whether the parameter name is a numbered parameter or
8037  // not.
8038  pm_refute_numbered_parameter(parser, name->start, name->end);
8039 
8040  // Otherwise we'll fetch the constant id for the parameter name and check
8041  // whether it's already in the current scope.
8042  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8043 
8044  if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8045  // Add an error if the parameter doesn't start with _ and has been seen before
8046  if ((name->start < name->end) && (*name->start != '_')) {
8047  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8048  }
8049  return true;
8050  }
8051  return false;
8052 }
8053 
8057 static void
8058 pm_parser_scope_pop(pm_parser_t *parser) {
8059  pm_scope_t *scope = parser->current_scope;
8060  parser->current_scope = scope->previous;
8061  pm_locals_free(&scope->locals);
8062  pm_node_list_free(&scope->implicit_parameters);
8063  xfree(scope);
8064 }
8065 
8066 /******************************************************************************/
8067 /* Stack helpers */
8068 /******************************************************************************/
8069 
8073 static inline void
8074 pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8075  *stack = (*stack << 1) | (value & 1);
8076 }
8077 
8081 static inline void
8082 pm_state_stack_pop(pm_state_stack_t *stack) {
8083  *stack >>= 1;
8084 }
8085 
8089 static inline bool
8090 pm_state_stack_p(const pm_state_stack_t *stack) {
8091  return *stack & 1;
8092 }
8093 
8094 static inline void
8095 pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8096  // Use the negation of the value to prevent stack overflow.
8097  pm_state_stack_push(&parser->accepts_block_stack, !value);
8098 }
8099 
8100 static inline void
8101 pm_accepts_block_stack_pop(pm_parser_t *parser) {
8102  pm_state_stack_pop(&parser->accepts_block_stack);
8103 }
8104 
8105 static inline bool
8106 pm_accepts_block_stack_p(pm_parser_t *parser) {
8107  return !pm_state_stack_p(&parser->accepts_block_stack);
8108 }
8109 
8110 static inline void
8111 pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8112  pm_state_stack_push(&parser->do_loop_stack, value);
8113 }
8114 
8115 static inline void
8116 pm_do_loop_stack_pop(pm_parser_t *parser) {
8117  pm_state_stack_pop(&parser->do_loop_stack);
8118 }
8119 
8120 static inline bool
8121 pm_do_loop_stack_p(pm_parser_t *parser) {
8122  return pm_state_stack_p(&parser->do_loop_stack);
8123 }
8124 
8125 /******************************************************************************/
8126 /* Lexer check helpers */
8127 /******************************************************************************/
8128 
8133 static inline uint8_t
8134 peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8135  if (cursor < parser->end) {
8136  return *cursor;
8137  } else {
8138  return '\0';
8139  }
8140 }
8141 
8147 static inline uint8_t
8148 peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8149  return peek_at(parser, parser->current.end + offset);
8150 }
8151 
8156 static inline uint8_t
8157 peek(const pm_parser_t *parser) {
8158  return peek_at(parser, parser->current.end);
8159 }
8160 
8165 static inline bool
8166 match(pm_parser_t *parser, uint8_t value) {
8167  if (peek(parser) == value) {
8168  parser->current.end++;
8169  return true;
8170  }
8171  return false;
8172 }
8173 
8178 static inline size_t
8179 match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8180  if (peek_at(parser, cursor) == '\n') {
8181  return 1;
8182  }
8183  if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8184  return 2;
8185  }
8186  return 0;
8187 }
8188 
8194 static inline size_t
8195 match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8196  return match_eol_at(parser, parser->current.end + offset);
8197 }
8198 
8204 static inline size_t
8205 match_eol(pm_parser_t *parser) {
8206  return match_eol_at(parser, parser->current.end);
8207 }
8208 
8212 static inline const uint8_t *
8213 next_newline(const uint8_t *cursor, ptrdiff_t length) {
8214  assert(length >= 0);
8215 
8216  // Note that it's okay for us to use memchr here to look for \n because none
8217  // of the encodings that we support have \n as a component of a multi-byte
8218  // character.
8219  return memchr(cursor, '\n', (size_t) length);
8220 }
8221 
8225 static inline bool
8226 ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8227  return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8228 }
8229 
8234 static bool
8235 parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8236  const pm_encoding_t *encoding = pm_encoding_find(start, end);
8237 
8238  if (encoding != NULL) {
8239  if (parser->encoding != encoding) {
8240  parser->encoding = encoding;
8241  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8242  }
8243 
8244  parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8245  return true;
8246  }
8247 
8248  return false;
8249 }
8250 
8255 static void
8256 parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8257  const uint8_t *cursor = parser->current.start + 1;
8258  const uint8_t *end = parser->current.end;
8259 
8260  bool separator = false;
8261  while (true) {
8262  if (end - cursor <= 6) return;
8263  switch (cursor[6]) {
8264  case 'C': case 'c': cursor += 6; continue;
8265  case 'O': case 'o': cursor += 5; continue;
8266  case 'D': case 'd': cursor += 4; continue;
8267  case 'I': case 'i': cursor += 3; continue;
8268  case 'N': case 'n': cursor += 2; continue;
8269  case 'G': case 'g': cursor += 1; continue;
8270  case '=': case ':':
8271  separator = true;
8272  cursor += 6;
8273  break;
8274  default:
8275  cursor += 6;
8276  if (pm_char_is_whitespace(*cursor)) break;
8277  continue;
8278  }
8279  if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8280  separator = false;
8281  }
8282 
8283  while (true) {
8284  do {
8285  if (++cursor >= end) return;
8286  } while (pm_char_is_whitespace(*cursor));
8287 
8288  if (separator) break;
8289  if (*cursor != '=' && *cursor != ':') return;
8290 
8291  separator = true;
8292  cursor++;
8293  }
8294 
8295  const uint8_t *value_start = cursor;
8296  while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8297 
8298  if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8299  // If we were unable to parse the encoding value, then we've got an
8300  // issue because we didn't understand the encoding that the user was
8301  // trying to use. In this case we'll keep using the default encoding but
8302  // add an error to the parser to indicate an unsuccessful parse.
8303  pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8304  }
8305 }
8306 
8307 typedef enum {
8308  PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8309  PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8310  PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8311 } pm_magic_comment_boolean_value_t;
8312 
8317 static pm_magic_comment_boolean_value_t
8318 parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8319  if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8320  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8321  } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8322  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8323  } else {
8324  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8325  }
8326 }
8327 
8328 static inline bool
8329 pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8330  return b == '\'' || b == '"' || b == ':' || b == ';';
8331 }
8332 
8338 static inline const uint8_t *
8339 parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8340  while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8341  if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8342  return cursor;
8343  }
8344  cursor++;
8345  }
8346  return NULL;
8347 }
8348 
8359 static inline bool
8360 parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8361  bool result = true;
8362 
8363  const uint8_t *start = parser->current.start + 1;
8364  const uint8_t *end = parser->current.end;
8365  if (end - start <= 7) return false;
8366 
8367  const uint8_t *cursor;
8368  bool indicator = false;
8369 
8370  if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8371  start = cursor + 3;
8372 
8373  if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8374  end = cursor;
8375  indicator = true;
8376  } else {
8377  // If we have a start marker but not an end marker, then we cannot
8378  // have a magic comment.
8379  return false;
8380  }
8381  }
8382 
8383  cursor = start;
8384  while (cursor < end) {
8385  while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8386 
8387  const uint8_t *key_start = cursor;
8388  while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8389 
8390  const uint8_t *key_end = cursor;
8391  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8392  if (cursor == end) break;
8393 
8394  if (*cursor == ':') {
8395  cursor++;
8396  } else {
8397  if (!indicator) return false;
8398  continue;
8399  }
8400 
8401  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8402  if (cursor == end) break;
8403 
8404  const uint8_t *value_start;
8405  const uint8_t *value_end;
8406 
8407  if (*cursor == '"') {
8408  value_start = ++cursor;
8409  for (; cursor < end && *cursor != '"'; cursor++) {
8410  if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8411  }
8412  value_end = cursor;
8413  if (*cursor == '"') cursor++;
8414  } else {
8415  value_start = cursor;
8416  while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8417  value_end = cursor;
8418  }
8419 
8420  if (indicator) {
8421  while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8422  } else {
8423  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8424  if (cursor != end) return false;
8425  }
8426 
8427  // Here, we need to do some processing on the key to swap out dashes for
8428  // underscores. We only need to do this if there _is_ a dash in the key.
8429  pm_string_t key;
8430  const size_t key_length = (size_t) (key_end - key_start);
8431  const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8432 
8433  if (dash == NULL) {
8434  pm_string_shared_init(&key, key_start, key_end);
8435  } else {
8436  uint8_t *buffer = xmalloc(key_length);
8437  if (buffer == NULL) break;
8438 
8439  memcpy(buffer, key_start, key_length);
8440  buffer[dash - key_start] = '_';
8441 
8442  while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8443  buffer[dash - key_start] = '_';
8444  }
8445 
8446  pm_string_owned_init(&key, buffer, key_length);
8447  }
8448 
8449  // Finally, we can start checking the key against the list of known
8450  // magic comment keys, and potentially change state based on that.
8451  const uint8_t *key_source = pm_string_source(&key);
8452  uint32_t value_length = (uint32_t) (value_end - value_start);
8453 
8454  // We only want to attempt to compare against encoding comments if it's
8455  // the first line in the file (or the second in the case of a shebang).
8456  if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8457  if (
8458  (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8459  (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8460  ) {
8461  result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8462  }
8463  }
8464 
8465  if (key_length == 11) {
8466  if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8467  switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8468  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8469  PM_PARSER_WARN_TOKEN_FORMAT(
8470  parser,
8471  parser->current,
8472  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8473  (int) key_length,
8474  (const char *) key_source,
8475  (int) value_length,
8476  (const char *) value_start
8477  );
8478  break;
8479  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8480  parser->warn_mismatched_indentation = false;
8481  break;
8482  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8483  parser->warn_mismatched_indentation = true;
8484  break;
8485  }
8486  }
8487  } else if (key_length == 21) {
8488  if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8489  // We only want to handle frozen string literal comments if it's
8490  // before any semantic tokens have been seen.
8491  if (semantic_token_seen) {
8492  pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8493  } else {
8494  switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8495  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8496  PM_PARSER_WARN_TOKEN_FORMAT(
8497  parser,
8498  parser->current,
8499  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8500  (int) key_length,
8501  (const char *) key_source,
8502  (int) value_length,
8503  (const char *) value_start
8504  );
8505  break;
8506  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8508  break;
8509  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8511  break;
8512  }
8513  }
8514  }
8515  } else if (key_length == 24) {
8516  if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8517  const uint8_t *cursor = parser->current.start;
8518  while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8519 
8520  if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8521  pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8522  } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8523  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8524  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8525  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8526  } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8527  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8528  } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8529  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8530  } else {
8531  PM_PARSER_WARN_TOKEN_FORMAT(
8532  parser,
8533  parser->current,
8534  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8535  (int) key_length,
8536  (const char *) key_source,
8537  (int) value_length,
8538  (const char *) value_start
8539  );
8540  }
8541  }
8542  }
8543 
8544  // When we're done, we want to free the string in case we had to
8545  // allocate memory for it.
8546  pm_string_free(&key);
8547 
8548  // Allocate a new magic comment node to append to the parser's list.
8550  if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8551  magic_comment->key_start = key_start;
8552  magic_comment->value_start = value_start;
8553  magic_comment->key_length = (uint32_t) key_length;
8554  magic_comment->value_length = value_length;
8556  }
8557  }
8558 
8559  return result;
8560 }
8561 
8562 /******************************************************************************/
8563 /* Context manipulations */
8564 /******************************************************************************/
8565 
8566 static bool
8567 context_terminator(pm_context_t context, pm_token_t *token) {
8568  switch (context) {
8569  case PM_CONTEXT_MAIN:
8570  case PM_CONTEXT_DEF_PARAMS:
8571  case PM_CONTEXT_DEFINED:
8573  case PM_CONTEXT_TERNARY:
8575  return token->type == PM_TOKEN_EOF;
8577  return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8578  case PM_CONTEXT_PREEXE:
8579  case PM_CONTEXT_POSTEXE:
8580  return token->type == PM_TOKEN_BRACE_RIGHT;
8581  case PM_CONTEXT_MODULE:
8582  case PM_CONTEXT_CLASS:
8583  case PM_CONTEXT_SCLASS:
8585  case PM_CONTEXT_DEF:
8587  return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8588  case PM_CONTEXT_WHILE:
8589  case PM_CONTEXT_UNTIL:
8590  case PM_CONTEXT_ELSE:
8591  case PM_CONTEXT_FOR:
8595  case PM_CONTEXT_DEF_ENSURE:
8599  return token->type == PM_TOKEN_KEYWORD_END;
8601  return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8602  case PM_CONTEXT_FOR_INDEX:
8603  return token->type == PM_TOKEN_KEYWORD_IN;
8604  case PM_CONTEXT_CASE_WHEN:
8605  return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8606  case PM_CONTEXT_CASE_IN:
8607  return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8608  case PM_CONTEXT_IF:
8609  case PM_CONTEXT_ELSIF:
8610  return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8611  case PM_CONTEXT_UNLESS:
8612  return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8613  case PM_CONTEXT_EMBEXPR:
8614  return token->type == PM_TOKEN_EMBEXPR_END;
8616  return token->type == PM_TOKEN_BRACE_RIGHT;
8617  case PM_CONTEXT_PARENS:
8618  return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8619  case PM_CONTEXT_BEGIN:
8623  case PM_CONTEXT_DEF_RESCUE:
8627  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8628  case PM_CONTEXT_BEGIN_ELSE:
8629  case PM_CONTEXT_BLOCK_ELSE:
8630  case PM_CONTEXT_CLASS_ELSE:
8631  case PM_CONTEXT_DEF_ELSE:
8635  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8637  return token->type == PM_TOKEN_BRACE_RIGHT;
8638  case PM_CONTEXT_PREDICATE:
8639  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8640  case PM_CONTEXT_NONE:
8641  return false;
8642  }
8643 
8644  return false;
8645 }
8646 
8651 static pm_context_t
8652 context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8653  pm_context_node_t *context_node = parser->current_context;
8654 
8655  while (context_node != NULL) {
8656  if (context_terminator(context_node->context, token)) return context_node->context;
8657  context_node = context_node->prev;
8658  }
8659 
8660  return PM_CONTEXT_NONE;
8661 }
8662 
8663 static bool
8664 context_push(pm_parser_t *parser, pm_context_t context) {
8665  pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8666  if (context_node == NULL) return false;
8667 
8668  *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8669 
8670  if (parser->current_context == NULL) {
8671  parser->current_context = context_node;
8672  } else {
8673  context_node->prev = parser->current_context;
8674  parser->current_context = context_node;
8675  }
8676 
8677  return true;
8678 }
8679 
8680 static void
8681 context_pop(pm_parser_t *parser) {
8682  pm_context_node_t *prev = parser->current_context->prev;
8683  xfree(parser->current_context);
8684  parser->current_context = prev;
8685 }
8686 
8687 static bool
8688 context_p(const pm_parser_t *parser, pm_context_t context) {
8689  pm_context_node_t *context_node = parser->current_context;
8690 
8691  while (context_node != NULL) {
8692  if (context_node->context == context) return true;
8693  context_node = context_node->prev;
8694  }
8695 
8696  return false;
8697 }
8698 
8699 static bool
8700 context_def_p(const pm_parser_t *parser) {
8701  pm_context_node_t *context_node = parser->current_context;
8702 
8703  while (context_node != NULL) {
8704  switch (context_node->context) {
8705  case PM_CONTEXT_DEF:
8706  case PM_CONTEXT_DEF_PARAMS:
8707  case PM_CONTEXT_DEF_ENSURE:
8708  case PM_CONTEXT_DEF_RESCUE:
8709  case PM_CONTEXT_DEF_ELSE:
8710  return true;
8711  case PM_CONTEXT_CLASS:
8714  case PM_CONTEXT_CLASS_ELSE:
8715  case PM_CONTEXT_MODULE:
8719  case PM_CONTEXT_SCLASS:
8723  return false;
8724  default:
8725  context_node = context_node->prev;
8726  }
8727  }
8728 
8729  return false;
8730 }
8731 
8736 static const char *
8737 context_human(pm_context_t context) {
8738  switch (context) {
8739  case PM_CONTEXT_NONE:
8740  assert(false && "unreachable");
8741  return "";
8742  case PM_CONTEXT_BEGIN: return "begin statement";
8743  case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8744  case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8745  case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8746  case PM_CONTEXT_CASE_IN: return "'in' clause";
8747  case PM_CONTEXT_CLASS: return "class definition";
8748  case PM_CONTEXT_DEF: return "method definition";
8749  case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8750  case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8751  case PM_CONTEXT_DEFINED: return "'defined?' expression";
8752  case PM_CONTEXT_ELSE:
8753  case PM_CONTEXT_BEGIN_ELSE:
8754  case PM_CONTEXT_BLOCK_ELSE:
8755  case PM_CONTEXT_CLASS_ELSE:
8756  case PM_CONTEXT_DEF_ELSE:
8759  case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8760  case PM_CONTEXT_ELSIF: return "'elsif' clause";
8761  case PM_CONTEXT_EMBEXPR: return "embedded expression";
8765  case PM_CONTEXT_DEF_ENSURE:
8768  case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8769  case PM_CONTEXT_FOR: return "for loop";
8770  case PM_CONTEXT_FOR_INDEX: return "for loop index";
8771  case PM_CONTEXT_IF: return "if statement";
8772  case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8773  case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8774  case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8775  case PM_CONTEXT_MAIN: return "top level context";
8776  case PM_CONTEXT_MODULE: return "module definition";
8777  case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8778  case PM_CONTEXT_PARENS: return "parentheses";
8779  case PM_CONTEXT_POSTEXE: return "'END' block";
8780  case PM_CONTEXT_PREDICATE: return "predicate";
8781  case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8785  case PM_CONTEXT_DEF_RESCUE:
8789  case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8790  case PM_CONTEXT_SCLASS: return "singleton class definition";
8791  case PM_CONTEXT_TERNARY: return "ternary expression";
8792  case PM_CONTEXT_UNLESS: return "unless statement";
8793  case PM_CONTEXT_UNTIL: return "until statement";
8794  case PM_CONTEXT_WHILE: return "while statement";
8795  }
8796 
8797  assert(false && "unreachable");
8798  return "";
8799 }
8800 
8801 /******************************************************************************/
8802 /* Specific token lexers */
8803 /******************************************************************************/
8804 
8805 static inline void
8806 pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8807  if (invalid != NULL) {
8808  pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8809  pm_parser_err(parser, invalid, invalid + 1, diag_id);
8810  }
8811 }
8812 
8813 static size_t
8814 pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8815  const uint8_t *invalid = NULL;
8816  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8817  pm_strspn_number_validate(parser, string, length, invalid);
8818  return length;
8819 }
8820 
8821 static size_t
8822 pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8823  const uint8_t *invalid = NULL;
8824  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8825  pm_strspn_number_validate(parser, string, length, invalid);
8826  return length;
8827 }
8828 
8829 static size_t
8830 pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8831  const uint8_t *invalid = NULL;
8832  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8833  pm_strspn_number_validate(parser, string, length, invalid);
8834  return length;
8835 }
8836 
8837 static size_t
8838 pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8839  const uint8_t *invalid = NULL;
8840  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8841  pm_strspn_number_validate(parser, string, length, invalid);
8842  return length;
8843 }
8844 
8845 static pm_token_type_t
8846 lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8848 
8849  // Here we're going to attempt to parse the optional decimal portion of a
8850  // float. If it's not there, then it's okay and we'll just continue on.
8851  if (peek(parser) == '.') {
8852  if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8853  parser->current.end += 2;
8854  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8855  type = PM_TOKEN_FLOAT;
8856  } else {
8857  // If we had a . and then something else, then it's not a float
8858  // suffix on a number it's a method call or something else.
8859  return type;
8860  }
8861  }
8862 
8863  // Here we're going to attempt to parse the optional exponent portion of a
8864  // float. If it's not there, it's okay and we'll just continue on.
8865  if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8866  if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8867  parser->current.end += 2;
8868 
8869  if (pm_char_is_decimal_digit(peek(parser))) {
8870  parser->current.end++;
8871  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8872  } else {
8873  pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8874  }
8875  } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8876  parser->current.end++;
8877  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8878  } else {
8879  return type;
8880  }
8881 
8882  *seen_e = true;
8883  type = PM_TOKEN_FLOAT;
8884  }
8885 
8886  return type;
8887 }
8888 
8889 static pm_token_type_t
8890 lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8892  *seen_e = false;
8893 
8894  if (peek_offset(parser, -1) == '0') {
8895  switch (*parser->current.end) {
8896  // 0d1111 is a decimal number
8897  case 'd':
8898  case 'D':
8899  parser->current.end++;
8900  if (pm_char_is_decimal_digit(peek(parser))) {
8901  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8902  } else {
8903  match(parser, '_');
8904  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8905  }
8906 
8907  break;
8908 
8909  // 0b1111 is a binary number
8910  case 'b':
8911  case 'B':
8912  parser->current.end++;
8913  if (pm_char_is_binary_digit(peek(parser))) {
8914  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8915  } else {
8916  match(parser, '_');
8917  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8918  }
8919 
8921  break;
8922 
8923  // 0o1111 is an octal number
8924  case 'o':
8925  case 'O':
8926  parser->current.end++;
8927  if (pm_char_is_octal_digit(peek(parser))) {
8928  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8929  } else {
8930  match(parser, '_');
8931  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8932  }
8933 
8935  break;
8936 
8937  // 01111 is an octal number
8938  case '_':
8939  case '0':
8940  case '1':
8941  case '2':
8942  case '3':
8943  case '4':
8944  case '5':
8945  case '6':
8946  case '7':
8947  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8949  break;
8950 
8951  // 0x1111 is a hexadecimal number
8952  case 'x':
8953  case 'X':
8954  parser->current.end++;
8955  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8956  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8957  } else {
8958  match(parser, '_');
8959  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8960  }
8961 
8963  break;
8964 
8965  // 0.xxx is a float
8966  case '.': {
8967  type = lex_optional_float_suffix(parser, seen_e);
8968  break;
8969  }
8970 
8971  // 0exxx is a float
8972  case 'e':
8973  case 'E': {
8974  type = lex_optional_float_suffix(parser, seen_e);
8975  break;
8976  }
8977  }
8978  } else {
8979  // If it didn't start with a 0, then we'll lex as far as we can into a
8980  // decimal number.
8981  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8982 
8983  // Afterward, we'll lex as far as we can into an optional float suffix.
8984  type = lex_optional_float_suffix(parser, seen_e);
8985  }
8986 
8987  // At this point we have a completed number, but we want to provide the user
8988  // with a good experience if they put an additional .xxx fractional
8989  // component on the end, so we'll check for that here.
8990  if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8991  const uint8_t *fraction_start = parser->current.end;
8992  const uint8_t *fraction_end = parser->current.end + 2;
8993  fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8994  pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8995  }
8996 
8997  return type;
8998 }
8999 
9000 static pm_token_type_t
9001 lex_numeric(pm_parser_t *parser) {
9004 
9005  if (parser->current.end < parser->end) {
9006  bool seen_e = false;
9007  type = lex_numeric_prefix(parser, &seen_e);
9008 
9009  const uint8_t *end = parser->current.end;
9010  pm_token_type_t suffix_type = type;
9011 
9012  if (type == PM_TOKEN_INTEGER) {
9013  if (match(parser, 'r')) {
9014  suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9015 
9016  if (match(parser, 'i')) {
9018  }
9019  } else if (match(parser, 'i')) {
9020  suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9021  }
9022  } else {
9023  if (!seen_e && match(parser, 'r')) {
9024  suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9025 
9026  if (match(parser, 'i')) {
9027  suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9028  }
9029  } else if (match(parser, 'i')) {
9030  suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9031  }
9032  }
9033 
9034  const uint8_t b = peek(parser);
9035  if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9036  parser->current.end = end;
9037  } else {
9038  type = suffix_type;
9039  }
9040  }
9041 
9042  return type;
9043 }
9044 
9045 static pm_token_type_t
9046 lex_global_variable(pm_parser_t *parser) {
9047  if (parser->current.end >= parser->end) {
9048  pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9049  return PM_TOKEN_GLOBAL_VARIABLE;
9050  }
9051 
9052  // True if multiple characters are allowed after the declaration of the
9053  // global variable. Not true when it starts with "$-".
9054  bool allow_multiple = true;
9055 
9056  switch (*parser->current.end) {
9057  case '~': // $~: match-data
9058  case '*': // $*: argv
9059  case '$': // $$: pid
9060  case '?': // $?: last status
9061  case '!': // $!: error string
9062  case '@': // $@: error position
9063  case '/': // $/: input record separator
9064  case '\\': // $\: output record separator
9065  case ';': // $;: field separator
9066  case ',': // $,: output field separator
9067  case '.': // $.: last read line number
9068  case '=': // $=: ignorecase
9069  case ':': // $:: load path
9070  case '<': // $<: reading filename
9071  case '>': // $>: default output handle
9072  case '\"': // $": already loaded files
9073  parser->current.end++;
9074  return PM_TOKEN_GLOBAL_VARIABLE;
9075 
9076  case '&': // $&: last match
9077  case '`': // $`: string before last match
9078  case '\'': // $': string after last match
9079  case '+': // $+: string matches last paren.
9080  parser->current.end++;
9081  return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9082 
9083  case '0': {
9084  parser->current.end++;
9085  size_t width;
9086 
9087  if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
9088  do {
9089  parser->current.end += width;
9090  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9091 
9092  // $0 isn't allowed to be followed by anything.
9093  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9094  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9095  }
9096 
9097  return PM_TOKEN_GLOBAL_VARIABLE;
9098  }
9099 
9100  case '1':
9101  case '2':
9102  case '3':
9103  case '4':
9104  case '5':
9105  case '6':
9106  case '7':
9107  case '8':
9108  case '9':
9109  parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9110  return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9111 
9112  case '-':
9113  parser->current.end++;
9114  allow_multiple = false;
9115  /* fallthrough */
9116  default: {
9117  size_t width;
9118 
9119  if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9120  do {
9121  parser->current.end += width;
9122  } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9123  } else if (pm_char_is_whitespace(peek(parser))) {
9124  // If we get here, then we have a $ followed by whitespace,
9125  // which is not allowed.
9126  pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9127  } else {
9128  // If we get here, then we have a $ followed by something that
9129  // isn't recognized as a global variable.
9130  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9131  const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9132  PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9133  }
9134 
9135  return PM_TOKEN_GLOBAL_VARIABLE;
9136  }
9137  }
9138 }
9139 
9152 static inline pm_token_type_t
9153 lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9154  if (memcmp(current_start, value, vlen) == 0) {
9155  pm_lex_state_t last_state = parser->lex_state;
9156 
9157  if (parser->lex_state & PM_LEX_STATE_FNAME) {
9158  lex_state_set(parser, PM_LEX_STATE_ENDFN);
9159  } else {
9160  lex_state_set(parser, state);
9161  if (state == PM_LEX_STATE_BEG) {
9162  parser->command_start = true;
9163  }
9164 
9165  if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9166  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9167  return modifier_type;
9168  }
9169  }
9170 
9171  return type;
9172  }
9173 
9174  return PM_TOKEN_EOF;
9175 }
9176 
9177 static pm_token_type_t
9178 lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9179  // Lex as far as we can into the current identifier.
9180  size_t width;
9181  const uint8_t *end = parser->end;
9182  const uint8_t *current_start = parser->current.start;
9183  const uint8_t *current_end = parser->current.end;
9184  bool encoding_changed = parser->encoding_changed;
9185 
9186  if (encoding_changed) {
9187  while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
9188  current_end += width;
9189  }
9190  } else {
9191  while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
9192  current_end += width;
9193  }
9194  }
9195  parser->current.end = current_end;
9196 
9197  // Now cache the length of the identifier so that we can quickly compare it
9198  // against known keywords.
9199  width = (size_t) (current_end - current_start);
9200 
9201  if (current_end < end) {
9202  if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9203  // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9204  // check if we're returning the defined? keyword or just an identifier.
9205  width++;
9206 
9207  if (
9208  ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9209  (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9210  ) {
9211  // If we're in a position where we can accept a : at the end of an
9212  // identifier, then we'll optionally accept it.
9213  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9214  (void) match(parser, ':');
9215  return PM_TOKEN_LABEL;
9216  }
9217 
9218  if (parser->lex_state != PM_LEX_STATE_DOT) {
9219  if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9220  return PM_TOKEN_KEYWORD_DEFINED;
9221  }
9222  }
9223 
9224  return PM_TOKEN_METHOD_NAME;
9225  }
9226 
9227  if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9228  // If we're in a position where we can accept a = at the end of an
9229  // identifier, then we'll optionally accept it.
9230  return PM_TOKEN_IDENTIFIER;
9231  }
9232 
9233  if (
9234  ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9235  peek(parser) == ':' && peek_offset(parser, 1) != ':'
9236  ) {
9237  // If we're in a position where we can accept a : at the end of an
9238  // identifier, then we'll optionally accept it.
9239  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9240  (void) match(parser, ':');
9241  return PM_TOKEN_LABEL;
9242  }
9243  }
9244 
9245  if (parser->lex_state != PM_LEX_STATE_DOT) {
9247  switch (width) {
9248  case 2:
9249  if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9250  if (pm_do_loop_stack_p(parser)) {
9251  return PM_TOKEN_KEYWORD_DO_LOOP;
9252  }
9253  return PM_TOKEN_KEYWORD_DO;
9254  }
9255 
9256  if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9257  if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9258  if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9259  break;
9260  case 3:
9261  if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9262  if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9263  if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9264  if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265  if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266  if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9267  if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9268  break;
9269  case 4:
9270  if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271  if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9272  if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9273  if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274  if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275  if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276  if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277  if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278  break;
9279  case 5:
9280  if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281  if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282  if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283  if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9284  if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9285  if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286  if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9287  if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9288  if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289  if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290  if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9291  if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9292  if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9293  break;
9294  case 6:
9295  if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296  if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297  if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9298  if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9299  if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9300  break;
9301  case 8:
9302  if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9303  if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9304  break;
9305  case 12:
9306  if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9307  break;
9308  }
9309  }
9310 
9311  if (encoding_changed) {
9312  return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9313  }
9314  return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9315 }
9316 
9321 static bool
9322 current_token_starts_line(pm_parser_t *parser) {
9323  return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9324 }
9325 
9340 static pm_token_type_t
9341 lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9342  // If there is no content following this #, then we're at the end of
9343  // the string and we can safely return string content.
9344  if (pound + 1 >= parser->end) {
9345  parser->current.end = pound + 1;
9346  return PM_TOKEN_STRING_CONTENT;
9347  }
9348 
9349  // Now we'll check against the character that follows the #. If it constitutes
9350  // valid interplation, we'll handle that, otherwise we'll return
9351  // PM_TOKEN_NOT_PROVIDED.
9352  switch (pound[1]) {
9353  case '@': {
9354  // In this case we may have hit an embedded instance or class variable.
9355  if (pound + 2 >= parser->end) {
9356  parser->current.end = pound + 1;
9357  return PM_TOKEN_STRING_CONTENT;
9358  }
9359 
9360  // If we're looking at a @ and there's another @, then we'll skip past the
9361  // second @.
9362  const uint8_t *variable = pound + 2;
9363  if (*variable == '@' && pound + 3 < parser->end) variable++;
9364 
9365  if (char_is_identifier_start(parser, variable)) {
9366  // At this point we're sure that we've either hit an embedded instance
9367  // or class variable. In this case we'll first need to check if we've
9368  // already consumed content.
9369  if (pound > parser->current.start) {
9370  parser->current.end = pound;
9371  return PM_TOKEN_STRING_CONTENT;
9372  }
9373 
9374  // Otherwise we need to return the embedded variable token
9375  // and then switch to the embedded variable lex mode.
9376  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9377  parser->current.end = pound + 1;
9378  return PM_TOKEN_EMBVAR;
9379  }
9380 
9381  // If we didn't get a valid interpolation, then this is just regular
9382  // string content. This is like if we get "#@-". In this case the caller
9383  // should keep lexing.
9384  parser->current.end = pound + 1;
9385  return PM_TOKEN_NOT_PROVIDED;
9386  }
9387  case '$':
9388  // In this case we may have hit an embedded global variable. If there's
9389  // not enough room, then we'll just return string content.
9390  if (pound + 2 >= parser->end) {
9391  parser->current.end = pound + 1;
9392  return PM_TOKEN_STRING_CONTENT;
9393  }
9394 
9395  // This is the character that we're going to check to see if it is the
9396  // start of an identifier that would indicate that this is a global
9397  // variable.
9398  const uint8_t *check = pound + 2;
9399 
9400  if (pound[2] == '-') {
9401  if (pound + 3 >= parser->end) {
9402  parser->current.end = pound + 2;
9403  return PM_TOKEN_STRING_CONTENT;
9404  }
9405 
9406  check++;
9407  }
9408 
9409  // If the character that we're going to check is the start of an
9410  // identifier, or we don't have a - and the character is a decimal number
9411  // or a global name punctuation character, then we've hit an embedded
9412  // global variable.
9413  if (
9414  char_is_identifier_start(parser, check) ||
9415  (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9416  ) {
9417  // In this case we've hit an embedded global variable. First check to
9418  // see if we've already consumed content. If we have, then we need to
9419  // return that content as string content first.
9420  if (pound > parser->current.start) {
9421  parser->current.end = pound;
9422  return PM_TOKEN_STRING_CONTENT;
9423  }
9424 
9425  // Otherwise, we need to return the embedded variable token and switch
9426  // to the embedded variable lex mode.
9427  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9428  parser->current.end = pound + 1;
9429  return PM_TOKEN_EMBVAR;
9430  }
9431 
9432  // In this case we've hit a #$ that does not indicate a global variable.
9433  // In this case we'll continue lexing past it.
9434  parser->current.end = pound + 1;
9435  return PM_TOKEN_NOT_PROVIDED;
9436  case '{':
9437  // In this case it's the start of an embedded expression. If we have
9438  // already consumed content, then we need to return that content as string
9439  // content first.
9440  if (pound > parser->current.start) {
9441  parser->current.end = pound;
9442  return PM_TOKEN_STRING_CONTENT;
9443  }
9444 
9445  parser->enclosure_nesting++;
9446 
9447  // Otherwise we'll skip past the #{ and begin lexing the embedded
9448  // expression.
9449  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9450  parser->current.end = pound + 2;
9451  parser->command_start = true;
9452  pm_do_loop_stack_push(parser, false);
9453  return PM_TOKEN_EMBEXPR_BEGIN;
9454  default:
9455  // In this case we've hit a # that doesn't constitute interpolation. We'll
9456  // mark that by returning the not provided token type. This tells the
9457  // consumer to keep lexing forward.
9458  parser->current.end = pound + 1;
9459  return PM_TOKEN_NOT_PROVIDED;
9460  }
9461 }
9462 
9463 static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9464 static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9465 static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9466 static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9467 static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9468 
9472 static const bool ascii_printable_chars[] = {
9473  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9474  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9475  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9476  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9477  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9478  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9479  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9480  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9481 };
9482 
9483 static inline bool
9484 char_is_ascii_printable(const uint8_t b) {
9485  return (b < 0x80) && ascii_printable_chars[b];
9486 }
9487 
9492 static inline uint8_t
9493 escape_hexadecimal_digit(const uint8_t value) {
9494  return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9495 }
9496 
9502 static inline uint32_t
9503 escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9504  uint32_t value = 0;
9505  for (size_t index = 0; index < length; index++) {
9506  if (index != 0) value <<= 4;
9507  value |= escape_hexadecimal_digit(string[index]);
9508  }
9509 
9510  // Here we're going to verify that the value is actually a valid Unicode
9511  // codepoint and not a surrogate pair.
9512  if (value >= 0xD800 && value <= 0xDFFF) {
9513  pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9514  return 0xFFFD;
9515  }
9516 
9517  return value;
9518 }
9519 
9523 static inline uint8_t
9524 escape_byte(uint8_t value, const uint8_t flags) {
9525  if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9526  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9527  return value;
9528 }
9529 
9533 static inline void
9534 escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9535  // \u escape sequences in string-like structures implicitly change the
9536  // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9537  // literal.
9538  if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9539  if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9540  PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9541  }
9542 
9544  }
9545 
9546  if (value <= 0x7F) { // 0xxxxxxx
9547  pm_buffer_append_byte(buffer, (uint8_t) value);
9548  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
9549  pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
9550  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9551  } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
9552  pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
9553  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9554  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9555  } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
9556  pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
9557  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
9558  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9559  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9560  } else {
9561  pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9562  pm_buffer_append_byte(buffer, 0xEF);
9563  pm_buffer_append_byte(buffer, 0xBF);
9564  pm_buffer_append_byte(buffer, 0xBD);
9565  }
9566 }
9567 
9572 static inline void
9573 escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9574  if (byte >= 0x80) {
9575  if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9576  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9577  }
9578 
9579  parser->explicit_encoding = parser->encoding;
9580  }
9581 
9582  pm_buffer_append_byte(buffer, byte);
9583 }
9584 
9588 static inline void
9589 escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9590  size_t width;
9591  if (parser->encoding_changed) {
9592  width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9593  } else {
9594  width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9595  }
9596 
9597  // TODO: If the character is invalid in the given encoding, then we'll just
9598  // push one byte into the buffer. This should actually be an error.
9599  width = (width == 0) ? 1 : width;
9600 
9601  for (size_t index = 0; index < width; index++) {
9602  escape_write_byte_encoded(parser, buffer, *parser->current.end);
9603  parser->current.end++;
9604  }
9605 }
9606 
9622 static inline void
9623 escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9624  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9625  pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9626  }
9627 
9628  escape_write_byte_encoded(parser, buffer, byte);
9629 }
9630 
9636 static void
9637 escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9638 #define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9639 
9640  PM_PARSER_WARN_TOKEN_FORMAT(
9641  parser,
9642  parser->current,
9643  PM_WARN_INVALID_CHARACTER,
9644  FLAG(flags),
9645  FLAG(flag),
9646  type
9647  );
9648 
9649 #undef FLAG
9650 }
9651 
9655 static void
9656 escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9657  switch (peek(parser)) {
9658  case '\\': {
9659  parser->current.end++;
9660  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9661  return;
9662  }
9663  case '\'': {
9664  parser->current.end++;
9665  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9666  return;
9667  }
9668  case 'a': {
9669  parser->current.end++;
9670  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9671  return;
9672  }
9673  case 'b': {
9674  parser->current.end++;
9675  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9676  return;
9677  }
9678  case 'e': {
9679  parser->current.end++;
9680  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9681  return;
9682  }
9683  case 'f': {
9684  parser->current.end++;
9685  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9686  return;
9687  }
9688  case 'n': {
9689  parser->current.end++;
9690  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9691  return;
9692  }
9693  case 'r': {
9694  parser->current.end++;
9695  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9696  return;
9697  }
9698  case 's': {
9699  parser->current.end++;
9700  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9701  return;
9702  }
9703  case 't': {
9704  parser->current.end++;
9705  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9706  return;
9707  }
9708  case 'v': {
9709  parser->current.end++;
9710  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9711  return;
9712  }
9713  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9714  uint8_t value = (uint8_t) (*parser->current.end - '0');
9715  parser->current.end++;
9716 
9717  if (pm_char_is_octal_digit(peek(parser))) {
9718  value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9719  parser->current.end++;
9720 
9721  if (pm_char_is_octal_digit(peek(parser))) {
9722  value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9723  parser->current.end++;
9724  }
9725  }
9726 
9727  escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9728  return;
9729  }
9730  case 'x': {
9731  const uint8_t *start = parser->current.end - 1;
9732 
9733  parser->current.end++;
9734  uint8_t byte = peek(parser);
9735 
9736  if (pm_char_is_hexadecimal_digit(byte)) {
9737  uint8_t value = escape_hexadecimal_digit(byte);
9738  parser->current.end++;
9739 
9740  byte = peek(parser);
9741  if (pm_char_is_hexadecimal_digit(byte)) {
9742  value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9743  parser->current.end++;
9744  }
9745 
9746  value = escape_byte(value, flags);
9747  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9748  if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9749  pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9750  } else {
9751  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9752  }
9753  }
9754 
9755  escape_write_byte_encoded(parser, buffer, value);
9756  } else {
9757  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9758  }
9759 
9760  return;
9761  }
9762  case 'u': {
9763  const uint8_t *start = parser->current.end - 1;
9764  parser->current.end++;
9765 
9766  if (parser->current.end == parser->end) {
9767  const uint8_t *start = parser->current.end - 2;
9768  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9769  } else if (peek(parser) == '{') {
9770  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9771  parser->current.end++;
9772 
9773  size_t whitespace;
9774  while (true) {
9775  if ((whitespace = pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9776  parser->current.end += whitespace;
9777  } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9778  // This is super hacky, but it gets us nicer error
9779  // messages because we can still pass it off to the
9780  // regular expression engine even if we hit an
9781  // unterminated regular expression.
9782  parser->current.end += 2;
9783  } else {
9784  break;
9785  }
9786  }
9787 
9788  const uint8_t *extra_codepoints_start = NULL;
9789  int codepoints_count = 0;
9790 
9791  while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9792  const uint8_t *unicode_start = parser->current.end;
9793  size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9794 
9795  if (hexadecimal_length > 6) {
9796  // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9797  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9798  } else if (hexadecimal_length == 0) {
9799  // there are not hexadecimal characters
9800 
9801  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9802  // If this is a regular expression, we are going to
9803  // let the regular expression engine handle this
9804  // error instead of us.
9805  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9806  } else {
9807  pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9808  pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9809  }
9810 
9811  return;
9812  }
9813 
9814  parser->current.end += hexadecimal_length;
9815  codepoints_count++;
9816  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9817  extra_codepoints_start = unicode_start;
9818  }
9819 
9820  uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9821  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9822 
9823  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
9824  }
9825 
9826  // ?\u{nnnn} character literal should contain only one codepoint
9827  // and cannot be like ?\u{nnnn mmmm}.
9828  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9829  pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9830  }
9831 
9832  if (parser->current.end == parser->end) {
9833  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9834  } else if (peek(parser) == '}') {
9835  parser->current.end++;
9836  } else {
9837  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9838  // If this is a regular expression, we are going to let
9839  // the regular expression engine handle this error
9840  // instead of us.
9841  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9842  } else {
9843  pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9844  }
9845  }
9846 
9847  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9848  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9849  }
9850  } else {
9851  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9852 
9853  if (length == 0) {
9854  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9855  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9856  } else {
9857  const uint8_t *start = parser->current.end - 2;
9858  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9859  }
9860  } else if (length == 4) {
9861  uint32_t value = escape_unicode(parser, parser->current.end, 4);
9862 
9863  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9864  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9865  }
9866 
9867  escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9868  parser->current.end += 4;
9869  } else {
9870  parser->current.end += length;
9871 
9872  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9873  // If this is a regular expression, we are going to let
9874  // the regular expression engine handle this error
9875  // instead of us.
9876  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9877  } else {
9878  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9879  }
9880  }
9881  }
9882 
9883  return;
9884  }
9885  case 'c': {
9886  parser->current.end++;
9887  if (flags & PM_ESCAPE_FLAG_CONTROL) {
9888  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9889  }
9890 
9891  if (parser->current.end == parser->end) {
9892  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9893  return;
9894  }
9895 
9896  uint8_t peeked = peek(parser);
9897  switch (peeked) {
9898  case '?': {
9899  parser->current.end++;
9900  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9901  return;
9902  }
9903  case '\\':
9904  parser->current.end++;
9905 
9906  if (match(parser, 'u') || match(parser, 'U')) {
9907  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9908  return;
9909  }
9910 
9911  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9912  return;
9913  case ' ':
9914  parser->current.end++;
9915  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9916  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9917  return;
9918  case '\t':
9919  parser->current.end++;
9920  escape_read_warn(parser, flags, 0, "\\t");
9921  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9922  return;
9923  default: {
9924  if (!char_is_ascii_printable(peeked)) {
9925  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9926  return;
9927  }
9928 
9929  parser->current.end++;
9930  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9931  return;
9932  }
9933  }
9934  }
9935  case 'C': {
9936  parser->current.end++;
9937  if (flags & PM_ESCAPE_FLAG_CONTROL) {
9938  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9939  }
9940 
9941  if (peek(parser) != '-') {
9942  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9943  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9944  return;
9945  }
9946 
9947  parser->current.end++;
9948  if (parser->current.end == parser->end) {
9949  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9950  return;
9951  }
9952 
9953  uint8_t peeked = peek(parser);
9954  switch (peeked) {
9955  case '?': {
9956  parser->current.end++;
9957  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9958  return;
9959  }
9960  case '\\':
9961  parser->current.end++;
9962 
9963  if (match(parser, 'u') || match(parser, 'U')) {
9964  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9965  return;
9966  }
9967 
9968  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9969  return;
9970  case ' ':
9971  parser->current.end++;
9972  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9973  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9974  return;
9975  case '\t':
9976  parser->current.end++;
9977  escape_read_warn(parser, flags, 0, "\\t");
9978  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9979  return;
9980  default: {
9981  if (!char_is_ascii_printable(peeked)) {
9982  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9983  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9984  return;
9985  }
9986 
9987  parser->current.end++;
9988  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9989  return;
9990  }
9991  }
9992  }
9993  case 'M': {
9994  parser->current.end++;
9995  if (flags & PM_ESCAPE_FLAG_META) {
9996  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9997  }
9998 
9999  if (peek(parser) != '-') {
10000  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10001  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10002  return;
10003  }
10004 
10005  parser->current.end++;
10006  if (parser->current.end == parser->end) {
10007  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10008  return;
10009  }
10010 
10011  uint8_t peeked = peek(parser);
10012  switch (peeked) {
10013  case '\\':
10014  parser->current.end++;
10015 
10016  if (match(parser, 'u') || match(parser, 'U')) {
10017  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10018  return;
10019  }
10020 
10021  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10022  return;
10023  case ' ':
10024  parser->current.end++;
10025  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10026  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10027  return;
10028  case '\t':
10029  parser->current.end++;
10030  escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10031  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10032  return;
10033  default:
10034  if (!char_is_ascii_printable(peeked)) {
10035  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10036  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10037  return;
10038  }
10039 
10040  parser->current.end++;
10041  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10042  return;
10043  }
10044  }
10045  case '\r': {
10046  if (peek_offset(parser, 1) == '\n') {
10047  parser->current.end += 2;
10048  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10049  return;
10050  }
10051  }
10052  /* fallthrough */
10053  default: {
10054  if (parser->current.end < parser->end) {
10055  escape_write_escape_encoded(parser, buffer);
10056  } else {
10057  pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10058  }
10059  return;
10060  }
10061  }
10062 }
10063 
10089 static pm_token_type_t
10090 lex_question_mark(pm_parser_t *parser) {
10091  if (lex_state_end_p(parser)) {
10092  lex_state_set(parser, PM_LEX_STATE_BEG);
10093  return PM_TOKEN_QUESTION_MARK;
10094  }
10095 
10096  if (parser->current.end >= parser->end) {
10097  pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10098  pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10100  }
10101 
10102  if (pm_char_is_whitespace(*parser->current.end)) {
10103  lex_state_set(parser, PM_LEX_STATE_BEG);
10104  return PM_TOKEN_QUESTION_MARK;
10105  }
10106 
10107  lex_state_set(parser, PM_LEX_STATE_BEG);
10108 
10109  if (match(parser, '\\')) {
10110  lex_state_set(parser, PM_LEX_STATE_END);
10111 
10112  pm_buffer_t buffer;
10113  pm_buffer_init_capacity(&buffer, 3);
10114 
10115  escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10116  pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10117 
10119  } else {
10120  size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10121 
10122  // Ternary operators can have a ? immediately followed by an identifier
10123  // which starts with an underscore. We check for this case here.
10124  if (
10125  !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10126  (
10127  (parser->current.end + encoding_width >= parser->end) ||
10128  !char_is_identifier(parser, parser->current.end + encoding_width)
10129  )
10130  ) {
10131  lex_state_set(parser, PM_LEX_STATE_END);
10132  parser->current.end += encoding_width;
10133  pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10135  }
10136  }
10137 
10138  return PM_TOKEN_QUESTION_MARK;
10139 }
10140 
10145 static pm_token_type_t
10146 lex_at_variable(pm_parser_t *parser) {
10148  size_t width;
10149 
10150  if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
10151  parser->current.end += width;
10152 
10153  while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
10154  parser->current.end += width;
10155  }
10156  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
10157  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10158  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10159  diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10160  }
10161 
10162  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10163  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10164  } else {
10165  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10166  pm_parser_err_token(parser, &parser->current, diag_id);
10167  }
10168 
10169  // If we're lexing an embedded variable, then we need to pop back into the
10170  // parent lex context.
10171  if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10172  lex_mode_pop(parser);
10173  }
10174 
10175  return type;
10176 }
10177 
10181 static inline void
10182 parser_lex_callback(pm_parser_t *parser) {
10183  if (parser->lex_callback) {
10184  parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10185  }
10186 }
10187 
10191 static inline pm_comment_t *
10192 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10193  pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10194  if (comment == NULL) return NULL;
10195 
10196  *comment = (pm_comment_t) {
10197  .type = type,
10198  .location = { parser->current.start, parser->current.end }
10199  };
10200 
10201  return comment;
10202 }
10203 
10209 static pm_token_type_t
10210 lex_embdoc(pm_parser_t *parser) {
10211  // First, lex out the EMBDOC_BEGIN token.
10212  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10213 
10214  if (newline == NULL) {
10215  parser->current.end = parser->end;
10216  } else {
10217  pm_newline_list_append(&parser->newline_list, newline);
10218  parser->current.end = newline + 1;
10219  }
10220 
10221  parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10222  parser_lex_callback(parser);
10223 
10224  // Now, create a comment that is going to be attached to the parser.
10225  pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10226  if (comment == NULL) return PM_TOKEN_EOF;
10227 
10228  // Now, loop until we find the end of the embedded documentation or the end
10229  // of the file.
10230  while (parser->current.end + 4 <= parser->end) {
10231  parser->current.start = parser->current.end;
10232 
10233  // If we've hit the end of the embedded documentation then we'll return
10234  // that token here.
10235  if (
10236  (memcmp(parser->current.end, "=end", 4) == 0) &&
10237  (
10238  (parser->current.end + 4 == parser->end) || // end of file
10239  pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10240  (parser->current.end[4] == '\0') || // NUL or end of script
10241  (parser->current.end[4] == '\004') || // ^D
10242  (parser->current.end[4] == '\032') // ^Z
10243  )
10244  ) {
10245  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10246 
10247  if (newline == NULL) {
10248  parser->current.end = parser->end;
10249  } else {
10250  pm_newline_list_append(&parser->newline_list, newline);
10251  parser->current.end = newline + 1;
10252  }
10253 
10254  parser->current.type = PM_TOKEN_EMBDOC_END;
10255  parser_lex_callback(parser);
10256 
10257  comment->location.end = parser->current.end;
10258  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10259 
10260  return PM_TOKEN_EMBDOC_END;
10261  }
10262 
10263  // Otherwise, we'll parse until the end of the line and return a line of
10264  // embedded documentation.
10265  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10266 
10267  if (newline == NULL) {
10268  parser->current.end = parser->end;
10269  } else {
10270  pm_newline_list_append(&parser->newline_list, newline);
10271  parser->current.end = newline + 1;
10272  }
10273 
10274  parser->current.type = PM_TOKEN_EMBDOC_LINE;
10275  parser_lex_callback(parser);
10276  }
10277 
10278  pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10279 
10280  comment->location.end = parser->current.end;
10281  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10282 
10283  return PM_TOKEN_EOF;
10284 }
10285 
10291 static inline void
10292 parser_lex_ignored_newline(pm_parser_t *parser) {
10293  parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10294  parser_lex_callback(parser);
10295 }
10296 
10306 static inline void
10307 parser_flush_heredoc_end(pm_parser_t *parser) {
10308  assert(parser->heredoc_end <= parser->end);
10309  parser->next_start = parser->heredoc_end;
10310  parser->heredoc_end = NULL;
10311 }
10312 
10316 static bool
10317 parser_end_of_line_p(const pm_parser_t *parser) {
10318  const uint8_t *cursor = parser->current.end;
10319 
10320  while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10321  if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10322  }
10323 
10324  return true;
10325 }
10326 
10345 typedef struct {
10351 
10356  const uint8_t *cursor;
10358 
10371 typedef struct {
10374 
10378 
10382 static inline void
10383 pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10384  pm_buffer_append_byte(&token_buffer->buffer, byte);
10385 }
10386 
10387 static inline void
10388 pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10389  pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10390 }
10391 
10395 static inline size_t
10396 parser_char_width(const pm_parser_t *parser) {
10397  size_t width;
10398  if (parser->encoding_changed) {
10399  width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10400  } else {
10401  width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10402  }
10403 
10404  // TODO: If the character is invalid in the given encoding, then we'll just
10405  // push one byte into the buffer. This should actually be an error.
10406  return (width == 0 ? 1 : width);
10407 }
10408 
10412 static void
10413 pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10414  size_t width = parser_char_width(parser);
10415  pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10416  parser->current.end += width;
10417 }
10418 
10419 static void
10420 pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10421  size_t width = parser_char_width(parser);
10422  pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10423  pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10424  parser->current.end += width;
10425 }
10426 
10427 static bool
10428 pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10429  for (size_t index = 0; index < length; index++) {
10430  if (value[index] & 0x80) return false;
10431  }
10432 
10433  return true;
10434 }
10435 
10442 static inline void
10443 pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10444  pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10445 }
10446 
10447 static inline void
10448 pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10449  pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10450  parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10451  pm_buffer_free(&token_buffer->regexp_buffer);
10452 }
10453 
10463 static void
10464 pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10465  if (token_buffer->cursor == NULL) {
10466  pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10467  } else {
10468  pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10469  pm_token_buffer_copy(parser, token_buffer);
10470  }
10471 }
10472 
10473 static void
10474 pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10475  if (token_buffer->base.cursor == NULL) {
10476  pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10477  parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10478  } else {
10479  pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10480  pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10481  pm_regexp_token_buffer_copy(parser, token_buffer);
10482  }
10483 }
10484 
10485 #define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10486 
10495 static void
10496 pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10497  const uint8_t *start;
10498  if (token_buffer->cursor == NULL) {
10499  pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10500  start = parser->current.start;
10501  } else {
10502  start = token_buffer->cursor;
10503  }
10504 
10505  const uint8_t *end = parser->current.end - 1;
10506  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10507 
10508  token_buffer->cursor = end;
10509 }
10510 
10511 static void
10512 pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10513  const uint8_t *start;
10514  if (token_buffer->base.cursor == NULL) {
10515  pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10516  pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10517  start = parser->current.start;
10518  } else {
10519  start = token_buffer->base.cursor;
10520  }
10521 
10522  const uint8_t *end = parser->current.end - 1;
10523  pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10524  pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10525 
10526  token_buffer->base.cursor = end;
10527 }
10528 
10529 #undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10530 
10535 static inline size_t
10536 pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10537  size_t whitespace = 0;
10538 
10539  switch (indent) {
10540  case PM_HEREDOC_INDENT_NONE:
10541  // Do nothing, we can't match a terminator with
10542  // indentation and there's no need to calculate common
10543  // whitespace.
10544  break;
10545  case PM_HEREDOC_INDENT_DASH:
10546  // Skip past inline whitespace.
10547  *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10548  break;
10549  case PM_HEREDOC_INDENT_TILDE:
10550  // Skip past inline whitespace and calculate common
10551  // whitespace.
10552  while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10553  if (**cursor == '\t') {
10554  whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10555  } else {
10556  whitespace++;
10557  }
10558  (*cursor)++;
10559  }
10560 
10561  break;
10562  }
10563 
10564  return whitespace;
10565 }
10566 
10571 static uint8_t
10572 pm_lex_percent_delimiter(pm_parser_t *parser) {
10573  size_t eol_length = match_eol(parser);
10574 
10575  if (eol_length) {
10576  if (parser->heredoc_end) {
10577  // If we have already lexed a heredoc, then the newline has already
10578  // been added to the list. In this case we want to just flush the
10579  // heredoc end.
10580  parser_flush_heredoc_end(parser);
10581  } else {
10582  // Otherwise, we'll add the newline to the list of newlines.
10583  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10584  }
10585 
10586  const uint8_t delimiter = *parser->current.end;
10587  parser->current.end += eol_length;
10588 
10589  return delimiter;
10590  }
10591 
10592  return *parser->current.end++;
10593 }
10594 
10599 #define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10600 
10607 static void
10608 parser_lex(pm_parser_t *parser) {
10609  assert(parser->current.end <= parser->end);
10610  parser->previous = parser->current;
10611 
10612  // This value mirrors cmd_state from CRuby.
10613  bool previous_command_start = parser->command_start;
10614  parser->command_start = false;
10615 
10616  // This is used to communicate to the newline lexing function that we've
10617  // already seen a comment.
10618  bool lexed_comment = false;
10619 
10620  // Here we cache the current value of the semantic token seen flag. This is
10621  // used to reset it in case we find a token that shouldn't flip this flag.
10622  unsigned int semantic_token_seen = parser->semantic_token_seen;
10623  parser->semantic_token_seen = true;
10624 
10625  switch (parser->lex_modes.current->mode) {
10626  case PM_LEX_DEFAULT:
10627  case PM_LEX_EMBEXPR:
10628  case PM_LEX_EMBVAR:
10629 
10630  // We have a specific named label here because we are going to jump back to
10631  // this location in the event that we have lexed a token that should not be
10632  // returned to the parser. This includes comments, ignored newlines, and
10633  // invalid tokens of some form.
10634  lex_next_token: {
10635  // If we have the special next_start pointer set, then we're going to jump
10636  // to that location and start lexing from there.
10637  if (parser->next_start != NULL) {
10638  parser->current.end = parser->next_start;
10639  parser->next_start = NULL;
10640  }
10641 
10642  // This value mirrors space_seen from CRuby. It tracks whether or not
10643  // space has been eaten before the start of the next token.
10644  bool space_seen = false;
10645 
10646  // First, we're going to skip past any whitespace at the front of the next
10647  // token.
10648  bool chomping = true;
10649  while (parser->current.end < parser->end && chomping) {
10650  switch (*parser->current.end) {
10651  case ' ':
10652  case '\t':
10653  case '\f':
10654  case '\v':
10655  parser->current.end++;
10656  space_seen = true;
10657  break;
10658  case '\r':
10659  if (match_eol_offset(parser, 1)) {
10660  chomping = false;
10661  } else {
10662  pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10663  parser->current.end++;
10664  space_seen = true;
10665  }
10666  break;
10667  case '\\': {
10668  size_t eol_length = match_eol_offset(parser, 1);
10669  if (eol_length) {
10670  if (parser->heredoc_end) {
10671  parser->current.end = parser->heredoc_end;
10672  parser->heredoc_end = NULL;
10673  } else {
10674  parser->current.end += eol_length + 1;
10675  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10676  space_seen = true;
10677  }
10678  } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10679  parser->current.end += 2;
10680  } else {
10681  chomping = false;
10682  }
10683 
10684  break;
10685  }
10686  default:
10687  chomping = false;
10688  break;
10689  }
10690  }
10691 
10692  // Next, we'll set to start of this token to be the current end.
10693  parser->current.start = parser->current.end;
10694 
10695  // We'll check if we're at the end of the file. If we are, then we
10696  // need to return the EOF token.
10697  if (parser->current.end >= parser->end) {
10698  // If we hit EOF, but the EOF came immediately after a newline,
10699  // set the start of the token to the newline. This way any EOF
10700  // errors will be reported as happening on that line rather than
10701  // a line after. For example "foo(\n" should report an error
10702  // on line 1 even though EOF technically occurs on line 2.
10703  if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10704  parser->current.start -= 1;
10705  }
10706  LEX(PM_TOKEN_EOF);
10707  }
10708 
10709  // Finally, we'll check the current character to determine the next
10710  // token.
10711  switch (*parser->current.end++) {
10712  case '\0': // NUL or end of script
10713  case '\004': // ^D
10714  case '\032': // ^Z
10715  parser->current.end--;
10716  LEX(PM_TOKEN_EOF);
10717 
10718  case '#': { // comments
10719  const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10720  parser->current.end = ending == NULL ? parser->end : ending;
10721 
10722  // If we found a comment while lexing, then we're going to
10723  // add it to the list of comments in the file and keep
10724  // lexing.
10725  pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10726  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10727 
10728  if (ending) parser->current.end++;
10729  parser->current.type = PM_TOKEN_COMMENT;
10730  parser_lex_callback(parser);
10731 
10732  // Here, parse the comment to see if it's a magic comment
10733  // and potentially change state on the parser.
10734  if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10735  ptrdiff_t length = parser->current.end - parser->current.start;
10736 
10737  // If we didn't find a magic comment within the first
10738  // pass and we're at the start of the file, then we need
10739  // to do another pass to potentially find other patterns
10740  // for encoding comments.
10741  if (length >= 10 && !parser->encoding_locked) {
10742  parser_lex_magic_comment_encoding(parser);
10743  }
10744  }
10745 
10746  lexed_comment = true;
10747  }
10748  /* fallthrough */
10749  case '\r':
10750  case '\n': {
10751  parser->semantic_token_seen = semantic_token_seen & 0x1;
10752  size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10753 
10754  if (eol_length) {
10755  // The only way you can have carriage returns in this
10756  // particular loop is if you have a carriage return
10757  // followed by a newline. In that case we'll just skip
10758  // over the carriage return and continue lexing, in
10759  // order to make it so that the newline token
10760  // encapsulates both the carriage return and the
10761  // newline. Note that we need to check that we haven't
10762  // already lexed a comment here because that falls
10763  // through into here as well.
10764  if (!lexed_comment) {
10765  parser->current.end += eol_length - 1; // skip CR
10766  }
10767 
10768  if (parser->heredoc_end == NULL) {
10769  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10770  }
10771  }
10772 
10773  if (parser->heredoc_end) {
10774  parser_flush_heredoc_end(parser);
10775  }
10776 
10777  // If this is an ignored newline, then we can continue lexing after
10778  // calling the callback with the ignored newline token.
10779  switch (lex_state_ignored_p(parser)) {
10780  case PM_IGNORED_NEWLINE_NONE:
10781  break;
10782  case PM_IGNORED_NEWLINE_PATTERN:
10783  if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10784  if (!lexed_comment) parser_lex_ignored_newline(parser);
10785  lex_state_set(parser, PM_LEX_STATE_BEG);
10786  parser->command_start = true;
10787  parser->current.type = PM_TOKEN_NEWLINE;
10788  return;
10789  }
10790  /* fallthrough */
10791  case PM_IGNORED_NEWLINE_ALL:
10792  if (!lexed_comment) parser_lex_ignored_newline(parser);
10793  lexed_comment = false;
10794  goto lex_next_token;
10795  }
10796 
10797  // Here we need to look ahead and see if there is a call operator
10798  // (either . or &.) that starts the next line. If there is, then this
10799  // is going to become an ignored newline and we're going to instead
10800  // return the call operator.
10801  const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10802  next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10803 
10804  if (next_content < parser->end) {
10805  // If we hit a comment after a newline, then we're going to check
10806  // if it's ignored or if it's followed by a method call ('.').
10807  // If it is, then we're going to call the
10808  // callback with an ignored newline and then continue lexing.
10809  // Otherwise we'll return a regular newline.
10810  if (next_content[0] == '#') {
10811  // Here we look for a "." or "&." following a "\n".
10812  const uint8_t *following = next_newline(next_content, parser->end - next_content);
10813 
10814  while (following && (following + 1 < parser->end)) {
10815  following++;
10816  following += pm_strspn_inline_whitespace(following, parser->end - following);
10817 
10818  // If this is not followed by a comment, then we can break out
10819  // of this loop.
10820  if (peek_at(parser, following) != '#') break;
10821 
10822  // If there is a comment, then we need to find the end of the
10823  // comment and continue searching from there.
10824  following = next_newline(following, parser->end - following);
10825  }
10826 
10827  // If the lex state was ignored, or we hit a '.' or a '&.',
10828  // we will lex the ignored newline
10829  if (
10830  lex_state_ignored_p(parser) ||
10831  (following && (
10832  (peek_at(parser, following) == '.') ||
10833  (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10834  ))
10835  ) {
10836  if (!lexed_comment) parser_lex_ignored_newline(parser);
10837  lexed_comment = false;
10838  goto lex_next_token;
10839  }
10840  }
10841 
10842  // If we hit a . after a newline, then we're in a call chain and
10843  // we need to return the call operator.
10844  if (next_content[0] == '.') {
10845  // To match ripper, we need to emit an ignored newline even though
10846  // it's a real newline in the case that we have a beginless range
10847  // on a subsequent line.
10848  if (peek_at(parser, next_content + 1) == '.') {
10849  if (!lexed_comment) parser_lex_ignored_newline(parser);
10850  lex_state_set(parser, PM_LEX_STATE_BEG);
10851  parser->command_start = true;
10852  parser->current.type = PM_TOKEN_NEWLINE;
10853  return;
10854  }
10855 
10856  if (!lexed_comment) parser_lex_ignored_newline(parser);
10857  lex_state_set(parser, PM_LEX_STATE_DOT);
10858  parser->current.start = next_content;
10859  parser->current.end = next_content + 1;
10860  parser->next_start = NULL;
10861  LEX(PM_TOKEN_DOT);
10862  }
10863 
10864  // If we hit a &. after a newline, then we're in a call chain and
10865  // we need to return the call operator.
10866  if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10867  if (!lexed_comment) parser_lex_ignored_newline(parser);
10868  lex_state_set(parser, PM_LEX_STATE_DOT);
10869  parser->current.start = next_content;
10870  parser->current.end = next_content + 2;
10871  parser->next_start = NULL;
10873  }
10874  }
10875 
10876  // At this point we know this is a regular newline, and we can set the
10877  // necessary state and return the token.
10878  lex_state_set(parser, PM_LEX_STATE_BEG);
10879  parser->command_start = true;
10880  parser->current.type = PM_TOKEN_NEWLINE;
10881  if (!lexed_comment) parser_lex_callback(parser);
10882  return;
10883  }
10884 
10885  // ,
10886  case ',':
10887  if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10888  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10889  }
10890 
10891  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10892  LEX(PM_TOKEN_COMMA);
10893 
10894  // (
10895  case '(': {
10897 
10898  if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10900  }
10901 
10902  parser->enclosure_nesting++;
10903  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10904  pm_do_loop_stack_push(parser, false);
10905  LEX(type);
10906  }
10907 
10908  // )
10909  case ')':
10910  parser->enclosure_nesting--;
10911  lex_state_set(parser, PM_LEX_STATE_ENDFN);
10912  pm_do_loop_stack_pop(parser);
10914 
10915  // ;
10916  case ';':
10917  lex_state_set(parser, PM_LEX_STATE_BEG);
10918  parser->command_start = true;
10919  LEX(PM_TOKEN_SEMICOLON);
10920 
10921  // [ [] []=
10922  case '[':
10923  parser->enclosure_nesting++;
10925 
10926  if (lex_state_operator_p(parser)) {
10927  if (match(parser, ']')) {
10928  parser->enclosure_nesting--;
10929  lex_state_set(parser, PM_LEX_STATE_ARG);
10931  }
10932 
10933  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10934  LEX(type);
10935  }
10936 
10937  if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10939  }
10940 
10941  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10942  pm_do_loop_stack_push(parser, false);
10943  LEX(type);
10944 
10945  // ]
10946  case ']':
10947  parser->enclosure_nesting--;
10948  lex_state_set(parser, PM_LEX_STATE_END);
10949  pm_do_loop_stack_pop(parser);
10951 
10952  // {
10953  case '{': {
10955 
10956  if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10957  // This { begins a lambda
10958  parser->command_start = true;
10959  lex_state_set(parser, PM_LEX_STATE_BEG);
10961  } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10962  // This { begins a hash literal
10963  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10964  } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10965  // This { begins a block
10966  parser->command_start = true;
10967  lex_state_set(parser, PM_LEX_STATE_BEG);
10968  } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10969  // This { begins a block on a command
10970  parser->command_start = true;
10971  lex_state_set(parser, PM_LEX_STATE_BEG);
10972  } else {
10973  // This { begins a hash literal
10974  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10975  }
10976 
10977  parser->enclosure_nesting++;
10978  parser->brace_nesting++;
10979  pm_do_loop_stack_push(parser, false);
10980 
10981  LEX(type);
10982  }
10983 
10984  // }
10985  case '}':
10986  parser->enclosure_nesting--;
10987  pm_do_loop_stack_pop(parser);
10988 
10989  if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10990  lex_mode_pop(parser);
10991  LEX(PM_TOKEN_EMBEXPR_END);
10992  }
10993 
10994  parser->brace_nesting--;
10995  lex_state_set(parser, PM_LEX_STATE_END);
10996  LEX(PM_TOKEN_BRACE_RIGHT);
10997 
10998  // * ** **= *=
10999  case '*': {
11000  if (match(parser, '*')) {
11001  if (match(parser, '=')) {
11002  lex_state_set(parser, PM_LEX_STATE_BEG);
11004  }
11005 
11007 
11008  if (lex_state_spcarg_p(parser, space_seen)) {
11009  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
11011  } else if (lex_state_beg_p(parser)) {
11013  } else if (ambiguous_operator_p(parser, space_seen)) {
11014  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
11015  }
11016 
11017  if (lex_state_operator_p(parser)) {
11018  lex_state_set(parser, PM_LEX_STATE_ARG);
11019  } else {
11020  lex_state_set(parser, PM_LEX_STATE_BEG);
11021  }
11022 
11023  LEX(type);
11024  }
11025 
11026  if (match(parser, '=')) {
11027  lex_state_set(parser, PM_LEX_STATE_BEG);
11028  LEX(PM_TOKEN_STAR_EQUAL);
11029  }
11030 
11032 
11033  if (lex_state_spcarg_p(parser, space_seen)) {
11034  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11035  type = PM_TOKEN_USTAR;
11036  } else if (lex_state_beg_p(parser)) {
11037  type = PM_TOKEN_USTAR;
11038  } else if (ambiguous_operator_p(parser, space_seen)) {
11039  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11040  }
11041 
11042  if (lex_state_operator_p(parser)) {
11043  lex_state_set(parser, PM_LEX_STATE_ARG);
11044  } else {
11045  lex_state_set(parser, PM_LEX_STATE_BEG);
11046  }
11047 
11048  LEX(type);
11049  }
11050 
11051  // ! != !~ !@
11052  case '!':
11053  if (lex_state_operator_p(parser)) {
11054  lex_state_set(parser, PM_LEX_STATE_ARG);
11055  if (match(parser, '@')) {
11056  LEX(PM_TOKEN_BANG);
11057  }
11058  } else {
11059  lex_state_set(parser, PM_LEX_STATE_BEG);
11060  }
11061 
11062  if (match(parser, '=')) {
11063  LEX(PM_TOKEN_BANG_EQUAL);
11064  }
11065 
11066  if (match(parser, '~')) {
11067  LEX(PM_TOKEN_BANG_TILDE);
11068  }
11069 
11070  LEX(PM_TOKEN_BANG);
11071 
11072  // = => =~ == === =begin
11073  case '=':
11074  if (
11075  current_token_starts_line(parser) &&
11076  (parser->current.end + 5 <= parser->end) &&
11077  memcmp(parser->current.end, "begin", 5) == 0 &&
11078  (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11079  ) {
11080  pm_token_type_t type = lex_embdoc(parser);
11081  if (type == PM_TOKEN_EOF) {
11082  LEX(type);
11083  }
11084 
11085  goto lex_next_token;
11086  }
11087 
11088  if (lex_state_operator_p(parser)) {
11089  lex_state_set(parser, PM_LEX_STATE_ARG);
11090  } else {
11091  lex_state_set(parser, PM_LEX_STATE_BEG);
11092  }
11093 
11094  if (match(parser, '>')) {
11096  }
11097 
11098  if (match(parser, '~')) {
11099  LEX(PM_TOKEN_EQUAL_TILDE);
11100  }
11101 
11102  if (match(parser, '=')) {
11103  LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11104  }
11105 
11106  LEX(PM_TOKEN_EQUAL);
11107 
11108  // < << <<= <= <=>
11109  case '<':
11110  if (match(parser, '<')) {
11111  if (
11112  !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11113  !lex_state_end_p(parser) &&
11114  (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11115  ) {
11116  const uint8_t *end = parser->current.end;
11117 
11118  pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11119  pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11120 
11121  if (match(parser, '-')) {
11122  indent = PM_HEREDOC_INDENT_DASH;
11123  }
11124  else if (match(parser, '~')) {
11125  indent = PM_HEREDOC_INDENT_TILDE;
11126  }
11127 
11128  if (match(parser, '`')) {
11129  quote = PM_HEREDOC_QUOTE_BACKTICK;
11130  }
11131  else if (match(parser, '"')) {
11132  quote = PM_HEREDOC_QUOTE_DOUBLE;
11133  }
11134  else if (match(parser, '\'')) {
11135  quote = PM_HEREDOC_QUOTE_SINGLE;
11136  }
11137 
11138  const uint8_t *ident_start = parser->current.end;
11139  size_t width = 0;
11140 
11141  if (parser->current.end >= parser->end) {
11142  parser->current.end = end;
11143  } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
11144  parser->current.end = end;
11145  } else {
11146  if (quote == PM_HEREDOC_QUOTE_NONE) {
11147  parser->current.end += width;
11148 
11149  while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
11150  parser->current.end += width;
11151  }
11152  } else {
11153  // If we have quotes, then we're going to go until we find the
11154  // end quote.
11155  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11156  if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11157  parser->current.end++;
11158  }
11159  }
11160 
11161  size_t ident_length = (size_t) (parser->current.end - ident_start);
11162  bool ident_error = false;
11163 
11164  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11165  pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11166  ident_error = true;
11167  }
11168 
11169  parser->explicit_encoding = NULL;
11170  lex_mode_push(parser, (pm_lex_mode_t) {
11171  .mode = PM_LEX_HEREDOC,
11172  .as.heredoc = {
11173  .base = {
11174  .ident_start = ident_start,
11175  .ident_length = ident_length,
11176  .quote = quote,
11177  .indent = indent
11178  },
11179  .next_start = parser->current.end,
11180  .common_whitespace = NULL,
11181  .line_continuation = false
11182  }
11183  });
11184 
11185  if (parser->heredoc_end == NULL) {
11186  const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11187 
11188  if (body_start == NULL) {
11189  // If there is no newline after the heredoc identifier, then
11190  // this is not a valid heredoc declaration. In this case we
11191  // will add an error, but we will still return a heredoc
11192  // start.
11193  if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11194  body_start = parser->end;
11195  } else {
11196  // Otherwise, we want to indicate that the body of the
11197  // heredoc starts on the character after the next newline.
11198  pm_newline_list_append(&parser->newline_list, body_start);
11199  body_start++;
11200  }
11201 
11202  parser->next_start = body_start;
11203  } else {
11204  parser->next_start = parser->heredoc_end;
11205  }
11206 
11208  }
11209  }
11210 
11211  if (match(parser, '=')) {
11212  lex_state_set(parser, PM_LEX_STATE_BEG);
11214  }
11215 
11216  if (ambiguous_operator_p(parser, space_seen)) {
11217  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11218  }
11219 
11220  if (lex_state_operator_p(parser)) {
11221  lex_state_set(parser, PM_LEX_STATE_ARG);
11222  } else {
11223  if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11224  lex_state_set(parser, PM_LEX_STATE_BEG);
11225  }
11226 
11227  LEX(PM_TOKEN_LESS_LESS);
11228  }
11229 
11230  if (lex_state_operator_p(parser)) {
11231  lex_state_set(parser, PM_LEX_STATE_ARG);
11232  } else {
11233  if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11234  lex_state_set(parser, PM_LEX_STATE_BEG);
11235  }
11236 
11237  if (match(parser, '=')) {
11238  if (match(parser, '>')) {
11240  }
11241 
11242  LEX(PM_TOKEN_LESS_EQUAL);
11243  }
11244 
11245  LEX(PM_TOKEN_LESS);
11246 
11247  // > >> >>= >=
11248  case '>':
11249  if (match(parser, '>')) {
11250  if (lex_state_operator_p(parser)) {
11251  lex_state_set(parser, PM_LEX_STATE_ARG);
11252  } else {
11253  lex_state_set(parser, PM_LEX_STATE_BEG);
11254  }
11255  LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11256  }
11257 
11258  if (lex_state_operator_p(parser)) {
11259  lex_state_set(parser, PM_LEX_STATE_ARG);
11260  } else {
11261  lex_state_set(parser, PM_LEX_STATE_BEG);
11262  }
11263 
11264  LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11265 
11266  // double-quoted string literal
11267  case '"': {
11268  bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11269  lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11270  LEX(PM_TOKEN_STRING_BEGIN);
11271  }
11272 
11273  // xstring literal
11274  case '`': {
11275  if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11276  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11277  LEX(PM_TOKEN_BACKTICK);
11278  }
11279 
11280  if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11281  if (previous_command_start) {
11282  lex_state_set(parser, PM_LEX_STATE_CMDARG);
11283  } else {
11284  lex_state_set(parser, PM_LEX_STATE_ARG);
11285  }
11286 
11287  LEX(PM_TOKEN_BACKTICK);
11288  }
11289 
11290  lex_mode_push_string(parser, true, false, '\0', '`');
11291  LEX(PM_TOKEN_BACKTICK);
11292  }
11293 
11294  // single-quoted string literal
11295  case '\'': {
11296  bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11297  lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11298  LEX(PM_TOKEN_STRING_BEGIN);
11299  }
11300 
11301  // ? character literal
11302  case '?':
11303  LEX(lex_question_mark(parser));
11304 
11305  // & && &&= &=
11306  case '&': {
11307  if (match(parser, '&')) {
11308  lex_state_set(parser, PM_LEX_STATE_BEG);
11309 
11310  if (match(parser, '=')) {
11312  }
11313 
11315  }
11316 
11317  if (match(parser, '=')) {
11318  lex_state_set(parser, PM_LEX_STATE_BEG);
11320  }
11321 
11322  if (match(parser, '.')) {
11323  lex_state_set(parser, PM_LEX_STATE_DOT);
11325  }
11326 
11328  if (lex_state_spcarg_p(parser, space_seen)) {
11329  if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11330  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11331  } else {
11332  const uint8_t delim = peek_offset(parser, 1);
11333 
11334  if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
11335  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11336  }
11337  }
11338 
11340  } else if (lex_state_beg_p(parser)) {
11342  } else if (ambiguous_operator_p(parser, space_seen)) {
11343  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11344  }
11345 
11346  if (lex_state_operator_p(parser)) {
11347  lex_state_set(parser, PM_LEX_STATE_ARG);
11348  } else {
11349  lex_state_set(parser, PM_LEX_STATE_BEG);
11350  }
11351 
11352  LEX(type);
11353  }
11354 
11355  // | || ||= |=
11356  case '|':
11357  if (match(parser, '|')) {
11358  if (match(parser, '=')) {
11359  lex_state_set(parser, PM_LEX_STATE_BEG);
11361  }
11362 
11363  if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11364  parser->current.end--;
11365  LEX(PM_TOKEN_PIPE);
11366  }
11367 
11368  lex_state_set(parser, PM_LEX_STATE_BEG);
11369  LEX(PM_TOKEN_PIPE_PIPE);
11370  }
11371 
11372  if (match(parser, '=')) {
11373  lex_state_set(parser, PM_LEX_STATE_BEG);
11374  LEX(PM_TOKEN_PIPE_EQUAL);
11375  }
11376 
11377  if (lex_state_operator_p(parser)) {
11378  lex_state_set(parser, PM_LEX_STATE_ARG);
11379  } else {
11380  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11381  }
11382 
11383  LEX(PM_TOKEN_PIPE);
11384 
11385  // + += +@
11386  case '+': {
11387  if (lex_state_operator_p(parser)) {
11388  lex_state_set(parser, PM_LEX_STATE_ARG);
11389 
11390  if (match(parser, '@')) {
11391  LEX(PM_TOKEN_UPLUS);
11392  }
11393 
11394  LEX(PM_TOKEN_PLUS);
11395  }
11396 
11397  if (match(parser, '=')) {
11398  lex_state_set(parser, PM_LEX_STATE_BEG);
11399  LEX(PM_TOKEN_PLUS_EQUAL);
11400  }
11401 
11402  if (
11403  lex_state_beg_p(parser) ||
11404  (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11405  ) {
11406  lex_state_set(parser, PM_LEX_STATE_BEG);
11407 
11408  if (pm_char_is_decimal_digit(peek(parser))) {
11409  parser->current.end++;
11410  pm_token_type_t type = lex_numeric(parser);
11411  lex_state_set(parser, PM_LEX_STATE_END);
11412  LEX(type);
11413  }
11414 
11415  LEX(PM_TOKEN_UPLUS);
11416  }
11417 
11418  if (ambiguous_operator_p(parser, space_seen)) {
11419  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11420  }
11421 
11422  lex_state_set(parser, PM_LEX_STATE_BEG);
11423  LEX(PM_TOKEN_PLUS);
11424  }
11425 
11426  // - -= -@
11427  case '-': {
11428  if (lex_state_operator_p(parser)) {
11429  lex_state_set(parser, PM_LEX_STATE_ARG);
11430 
11431  if (match(parser, '@')) {
11432  LEX(PM_TOKEN_UMINUS);
11433  }
11434 
11435  LEX(PM_TOKEN_MINUS);
11436  }
11437 
11438  if (match(parser, '=')) {
11439  lex_state_set(parser, PM_LEX_STATE_BEG);
11440  LEX(PM_TOKEN_MINUS_EQUAL);
11441  }
11442 
11443  if (match(parser, '>')) {
11444  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11446  }
11447 
11448  bool spcarg = lex_state_spcarg_p(parser, space_seen);
11449  bool is_beg = lex_state_beg_p(parser);
11450  if (!is_beg && spcarg) {
11451  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11452  }
11453 
11454  if (is_beg || spcarg) {
11455  lex_state_set(parser, PM_LEX_STATE_BEG);
11457  }
11458 
11459  if (ambiguous_operator_p(parser, space_seen)) {
11460  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11461  }
11462 
11463  lex_state_set(parser, PM_LEX_STATE_BEG);
11464  LEX(PM_TOKEN_MINUS);
11465  }
11466 
11467  // . .. ...
11468  case '.': {
11469  bool beg_p = lex_state_beg_p(parser);
11470 
11471  if (match(parser, '.')) {
11472  if (match(parser, '.')) {
11473  // If we're _not_ inside a range within default parameters
11474  if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11475  if (lex_state_p(parser, PM_LEX_STATE_END)) {
11476  lex_state_set(parser, PM_LEX_STATE_BEG);
11477  } else {
11478  lex_state_set(parser, PM_LEX_STATE_ENDARG);
11479  }
11480  LEX(PM_TOKEN_UDOT_DOT_DOT);
11481  }
11482 
11483  if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11484  pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11485  }
11486 
11487  lex_state_set(parser, PM_LEX_STATE_BEG);
11489  }
11490 
11491  lex_state_set(parser, PM_LEX_STATE_BEG);
11492  LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11493  }
11494 
11495  lex_state_set(parser, PM_LEX_STATE_DOT);
11496  LEX(PM_TOKEN_DOT);
11497  }
11498 
11499  // integer
11500  case '0':
11501  case '1':
11502  case '2':
11503  case '3':
11504  case '4':
11505  case '5':
11506  case '6':
11507  case '7':
11508  case '8':
11509  case '9': {
11510  pm_token_type_t type = lex_numeric(parser);
11511  lex_state_set(parser, PM_LEX_STATE_END);
11512  LEX(type);
11513  }
11514 
11515  // :: symbol
11516  case ':':
11517  if (match(parser, ':')) {
11518  if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11519  lex_state_set(parser, PM_LEX_STATE_BEG);
11520  LEX(PM_TOKEN_UCOLON_COLON);
11521  }
11522 
11523  lex_state_set(parser, PM_LEX_STATE_DOT);
11524  LEX(PM_TOKEN_COLON_COLON);
11525  }
11526 
11527  if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11528  lex_state_set(parser, PM_LEX_STATE_BEG);
11529  LEX(PM_TOKEN_COLON);
11530  }
11531 
11532  if (peek(parser) == '"' || peek(parser) == '\'') {
11533  lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11534  parser->current.end++;
11535  }
11536 
11537  lex_state_set(parser, PM_LEX_STATE_FNAME);
11538  LEX(PM_TOKEN_SYMBOL_BEGIN);
11539 
11540  // / /=
11541  case '/':
11542  if (lex_state_beg_p(parser)) {
11543  lex_mode_push_regexp(parser, '\0', '/');
11544  LEX(PM_TOKEN_REGEXP_BEGIN);
11545  }
11546 
11547  if (match(parser, '=')) {
11548  lex_state_set(parser, PM_LEX_STATE_BEG);
11549  LEX(PM_TOKEN_SLASH_EQUAL);
11550  }
11551 
11552  if (lex_state_spcarg_p(parser, space_seen)) {
11553  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11554  lex_mode_push_regexp(parser, '\0', '/');
11555  LEX(PM_TOKEN_REGEXP_BEGIN);
11556  }
11557 
11558  if (ambiguous_operator_p(parser, space_seen)) {
11559  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11560  }
11561 
11562  if (lex_state_operator_p(parser)) {
11563  lex_state_set(parser, PM_LEX_STATE_ARG);
11564  } else {
11565  lex_state_set(parser, PM_LEX_STATE_BEG);
11566  }
11567 
11568  LEX(PM_TOKEN_SLASH);
11569 
11570  // ^ ^=
11571  case '^':
11572  if (lex_state_operator_p(parser)) {
11573  lex_state_set(parser, PM_LEX_STATE_ARG);
11574  } else {
11575  lex_state_set(parser, PM_LEX_STATE_BEG);
11576  }
11577  LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11578 
11579  // ~ ~@
11580  case '~':
11581  if (lex_state_operator_p(parser)) {
11582  (void) match(parser, '@');
11583  lex_state_set(parser, PM_LEX_STATE_ARG);
11584  } else {
11585  lex_state_set(parser, PM_LEX_STATE_BEG);
11586  }
11587 
11588  LEX(PM_TOKEN_TILDE);
11589 
11590  // % %= %i %I %q %Q %w %W
11591  case '%': {
11592  // If there is no subsequent character then we have an
11593  // invalid token. We're going to say it's the percent
11594  // operator because we don't want to move into the string
11595  // lex mode unnecessarily.
11596  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11597  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11598  LEX(PM_TOKEN_PERCENT);
11599  }
11600 
11601  if (!lex_state_beg_p(parser) && match(parser, '=')) {
11602  lex_state_set(parser, PM_LEX_STATE_BEG);
11604  } else if (
11605  lex_state_beg_p(parser) ||
11606  (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11607  lex_state_spcarg_p(parser, space_seen)
11608  ) {
11609  if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11610  if (*parser->current.end >= 0x80) {
11611  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11612  }
11613 
11614  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11615  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11616  LEX(PM_TOKEN_STRING_BEGIN);
11617  }
11618 
11619  // Delimiters for %-literals cannot be alphanumeric. We
11620  // validate that here.
11621  uint8_t delimiter = peek_offset(parser, 1);
11622  if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11623  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11624  goto lex_next_token;
11625  }
11626 
11627  switch (peek(parser)) {
11628  case 'i': {
11629  parser->current.end++;
11630 
11631  if (parser->current.end < parser->end) {
11632  lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11633  } else {
11634  lex_mode_push_list_eof(parser);
11635  }
11636 
11638  }
11639  case 'I': {
11640  parser->current.end++;
11641 
11642  if (parser->current.end < parser->end) {
11643  lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11644  } else {
11645  lex_mode_push_list_eof(parser);
11646  }
11647 
11649  }
11650  case 'r': {
11651  parser->current.end++;
11652 
11653  if (parser->current.end < parser->end) {
11654  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11655  lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11656  } else {
11657  lex_mode_push_regexp(parser, '\0', '\0');
11658  }
11659 
11660  LEX(PM_TOKEN_REGEXP_BEGIN);
11661  }
11662  case 'q': {
11663  parser->current.end++;
11664 
11665  if (parser->current.end < parser->end) {
11666  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11667  lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11668  } else {
11669  lex_mode_push_string_eof(parser);
11670  }
11671 
11672  LEX(PM_TOKEN_STRING_BEGIN);
11673  }
11674  case 'Q': {
11675  parser->current.end++;
11676 
11677  if (parser->current.end < parser->end) {
11678  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11679  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11680  } else {
11681  lex_mode_push_string_eof(parser);
11682  }
11683 
11684  LEX(PM_TOKEN_STRING_BEGIN);
11685  }
11686  case 's': {
11687  parser->current.end++;
11688 
11689  if (parser->current.end < parser->end) {
11690  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11691  lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11692  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11693  } else {
11694  lex_mode_push_string_eof(parser);
11695  }
11696 
11697  LEX(PM_TOKEN_SYMBOL_BEGIN);
11698  }
11699  case 'w': {
11700  parser->current.end++;
11701 
11702  if (parser->current.end < parser->end) {
11703  lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11704  } else {
11705  lex_mode_push_list_eof(parser);
11706  }
11707 
11709  }
11710  case 'W': {
11711  parser->current.end++;
11712 
11713  if (parser->current.end < parser->end) {
11714  lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11715  } else {
11716  lex_mode_push_list_eof(parser);
11717  }
11718 
11720  }
11721  case 'x': {
11722  parser->current.end++;
11723 
11724  if (parser->current.end < parser->end) {
11725  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11726  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11727  } else {
11728  lex_mode_push_string_eof(parser);
11729  }
11730 
11732  }
11733  default:
11734  // If we get to this point, then we have a % that is completely
11735  // unparsable. In this case we'll just drop it from the parser
11736  // and skip past it and hope that the next token is something
11737  // that we can parse.
11738  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11739  goto lex_next_token;
11740  }
11741  }
11742 
11743  if (ambiguous_operator_p(parser, space_seen)) {
11744  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11745  }
11746 
11747  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11748  LEX(PM_TOKEN_PERCENT);
11749  }
11750 
11751  // global variable
11752  case '$': {
11753  pm_token_type_t type = lex_global_variable(parser);
11754 
11755  // If we're lexing an embedded variable, then we need to pop back into
11756  // the parent lex context.
11757  if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11758  lex_mode_pop(parser);
11759  }
11760 
11761  lex_state_set(parser, PM_LEX_STATE_END);
11762  LEX(type);
11763  }
11764 
11765  // instance variable, class variable
11766  case '@':
11767  lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11768  LEX(lex_at_variable(parser));
11769 
11770  default: {
11771  if (*parser->current.start != '_') {
11772  size_t width = char_is_identifier_start(parser, parser->current.start);
11773 
11774  // If this isn't the beginning of an identifier, then
11775  // it's an invalid token as we've exhausted all of the
11776  // other options. We'll skip past it and return the next
11777  // token after adding an appropriate error message.
11778  if (!width) {
11779  if (*parser->current.start >= 0x80) {
11780  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11781  } else if (*parser->current.start == '\\') {
11782  switch (peek_at(parser, parser->current.start + 1)) {
11783  case ' ':
11784  parser->current.end++;
11785  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11786  break;
11787  case '\f':
11788  parser->current.end++;
11789  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11790  break;
11791  case '\t':
11792  parser->current.end++;
11793  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11794  break;
11795  case '\v':
11796  parser->current.end++;
11797  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11798  break;
11799  case '\r':
11800  if (peek_at(parser, parser->current.start + 2) != '\n') {
11801  parser->current.end++;
11802  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11803  break;
11804  }
11805  /* fallthrough */
11806  default:
11807  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11808  break;
11809  }
11810  } else if (char_is_ascii_printable(*parser->current.start)) {
11811  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11812  } else {
11813  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11814  }
11815 
11816  goto lex_next_token;
11817  }
11818 
11819  parser->current.end = parser->current.start + width;
11820  }
11821 
11822  pm_token_type_t type = lex_identifier(parser, previous_command_start);
11823 
11824  // If we've hit a __END__ and it was at the start of the
11825  // line or the start of the file and it is followed by
11826  // either a \n or a \r\n, then this is the last token of the
11827  // file.
11828  if (
11829  ((parser->current.end - parser->current.start) == 7) &&
11830  current_token_starts_line(parser) &&
11831  (memcmp(parser->current.start, "__END__", 7) == 0) &&
11832  (parser->current.end == parser->end || match_eol(parser))
11833  ) {
11834  // Since we know we're about to add an __END__ comment,
11835  // we know we need to add all of the newlines to get the
11836  // correct column information for it.
11837  const uint8_t *cursor = parser->current.end;
11838  while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11839  pm_newline_list_append(&parser->newline_list, cursor++);
11840  }
11841 
11842  parser->current.end = parser->end;
11843  parser->current.type = PM_TOKEN___END__;
11844  parser_lex_callback(parser);
11845 
11846  parser->data_loc.start = parser->current.start;
11847  parser->data_loc.end = parser->current.end;
11848 
11849  LEX(PM_TOKEN_EOF);
11850  }
11851 
11852  pm_lex_state_t last_state = parser->lex_state;
11853 
11855  if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11856  if (previous_command_start) {
11857  lex_state_set(parser, PM_LEX_STATE_CMDARG);
11858  } else {
11859  lex_state_set(parser, PM_LEX_STATE_ARG);
11860  }
11861  } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11862  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11863  } else {
11864  lex_state_set(parser, PM_LEX_STATE_END);
11865  }
11866  }
11867 
11868  if (
11869  !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11870  (type == PM_TOKEN_IDENTIFIER) &&
11871  ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11872  pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11873  ) {
11874  lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11875  }
11876 
11877  LEX(type);
11878  }
11879  }
11880  }
11881  case PM_LEX_LIST: {
11882  if (parser->next_start != NULL) {
11883  parser->current.end = parser->next_start;
11884  parser->next_start = NULL;
11885  }
11886 
11887  // First we'll set the beginning of the token.
11888  parser->current.start = parser->current.end;
11889 
11890  // If there's any whitespace at the start of the list, then we're
11891  // going to trim it off the beginning and create a new token.
11892  size_t whitespace;
11893 
11894  if (parser->heredoc_end) {
11895  whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11896  if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11897  whitespace += 1;
11898  }
11899  } else {
11900  whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11901  }
11902 
11903  if (whitespace > 0) {
11904  parser->current.end += whitespace;
11905  if (peek_offset(parser, -1) == '\n') {
11906  // mutates next_start
11907  parser_flush_heredoc_end(parser);
11908  }
11909  LEX(PM_TOKEN_WORDS_SEP);
11910  }
11911 
11912  // We'll check if we're at the end of the file. If we are, then we
11913  // need to return the EOF token.
11914  if (parser->current.end >= parser->end) {
11915  LEX(PM_TOKEN_EOF);
11916  }
11917 
11918  // Here we'll get a list of the places where strpbrk should break,
11919  // and then find the first one.
11920  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11921  const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11922  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11923 
11924  // If we haven't found an escape yet, then this buffer will be
11925  // unallocated since we can refer directly to the source string.
11926  pm_token_buffer_t token_buffer = { 0 };
11927 
11928  while (breakpoint != NULL) {
11929  // If we hit whitespace, then we must have received content by
11930  // now, so we can return an element of the list.
11931  if (pm_char_is_whitespace(*breakpoint)) {
11932  parser->current.end = breakpoint;
11933  pm_token_buffer_flush(parser, &token_buffer);
11935  }
11936 
11937  // If we hit the terminator, we need to check which token to
11938  // return.
11939  if (*breakpoint == lex_mode->as.list.terminator) {
11940  // If this terminator doesn't actually close the list, then
11941  // we need to continue on past it.
11942  if (lex_mode->as.list.nesting > 0) {
11943  parser->current.end = breakpoint + 1;
11944  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11945  lex_mode->as.list.nesting--;
11946  continue;
11947  }
11948 
11949  // If we've hit the terminator and we've already skipped
11950  // past content, then we can return a list node.
11951  if (breakpoint > parser->current.start) {
11952  parser->current.end = breakpoint;
11953  pm_token_buffer_flush(parser, &token_buffer);
11955  }
11956 
11957  // Otherwise, switch back to the default state and return
11958  // the end of the list.
11959  parser->current.end = breakpoint + 1;
11960  lex_mode_pop(parser);
11961  lex_state_set(parser, PM_LEX_STATE_END);
11962  LEX(PM_TOKEN_STRING_END);
11963  }
11964 
11965  // If we hit a null byte, skip directly past it.
11966  if (*breakpoint == '\0') {
11967  breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11968  continue;
11969  }
11970 
11971  // If we hit escapes, then we need to treat the next token
11972  // literally. In this case we'll skip past the next character
11973  // and find the next breakpoint.
11974  if (*breakpoint == '\\') {
11975  parser->current.end = breakpoint + 1;
11976 
11977  // If we've hit the end of the file, then break out of the
11978  // loop by setting the breakpoint to NULL.
11979  if (parser->current.end == parser->end) {
11980  breakpoint = NULL;
11981  continue;
11982  }
11983 
11984  pm_token_buffer_escape(parser, &token_buffer);
11985  uint8_t peeked = peek(parser);
11986 
11987  switch (peeked) {
11988  case ' ':
11989  case '\f':
11990  case '\t':
11991  case '\v':
11992  case '\\':
11993  pm_token_buffer_push_byte(&token_buffer, peeked);
11994  parser->current.end++;
11995  break;
11996  case '\r':
11997  parser->current.end++;
11998  if (peek(parser) != '\n') {
11999  pm_token_buffer_push_byte(&token_buffer, '\r');
12000  break;
12001  }
12002  /* fallthrough */
12003  case '\n':
12004  pm_token_buffer_push_byte(&token_buffer, '\n');
12005 
12006  if (parser->heredoc_end) {
12007  // ... if we are on the same line as a heredoc,
12008  // flush the heredoc and continue parsing after
12009  // heredoc_end.
12010  parser_flush_heredoc_end(parser);
12011  pm_token_buffer_copy(parser, &token_buffer);
12013  } else {
12014  // ... else track the newline.
12015  pm_newline_list_append(&parser->newline_list, parser->current.end);
12016  }
12017 
12018  parser->current.end++;
12019  break;
12020  default:
12021  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12022  pm_token_buffer_push_byte(&token_buffer, peeked);
12023  parser->current.end++;
12024  } else if (lex_mode->as.list.interpolation) {
12025  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12026  } else {
12027  pm_token_buffer_push_byte(&token_buffer, '\\');
12028  pm_token_buffer_push_escaped(&token_buffer, parser);
12029  }
12030 
12031  break;
12032  }
12033 
12034  token_buffer.cursor = parser->current.end;
12035  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12036  continue;
12037  }
12038 
12039  // If we hit a #, then we will attempt to lex interpolation.
12040  if (*breakpoint == '#') {
12041  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12042 
12043  if (type == PM_TOKEN_NOT_PROVIDED) {
12044  // If we haven't returned at this point then we had something
12045  // that looked like an interpolated class or instance variable
12046  // like "#@" but wasn't actually. In this case we'll just skip
12047  // to the next breakpoint.
12048  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12049  continue;
12050  }
12051 
12052  if (type == PM_TOKEN_STRING_CONTENT) {
12053  pm_token_buffer_flush(parser, &token_buffer);
12054  }
12055 
12056  LEX(type);
12057  }
12058 
12059  // If we've hit the incrementor, then we need to skip past it
12060  // and find the next breakpoint.
12061  assert(*breakpoint == lex_mode->as.list.incrementor);
12062  parser->current.end = breakpoint + 1;
12063  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12064  lex_mode->as.list.nesting++;
12065  continue;
12066  }
12067 
12068  if (parser->current.end > parser->current.start) {
12069  pm_token_buffer_flush(parser, &token_buffer);
12071  }
12072 
12073  // If we were unable to find a breakpoint, then this token hits the
12074  // end of the file.
12075  parser->current.end = parser->end;
12076  pm_token_buffer_flush(parser, &token_buffer);
12078  }
12079  case PM_LEX_REGEXP: {
12080  // First, we'll set to start of this token to be the current end.
12081  if (parser->next_start == NULL) {
12082  parser->current.start = parser->current.end;
12083  } else {
12084  parser->current.start = parser->next_start;
12085  parser->current.end = parser->next_start;
12086  parser->next_start = NULL;
12087  }
12088 
12089  // We'll check if we're at the end of the file. If we are, then we
12090  // need to return the EOF token.
12091  if (parser->current.end >= parser->end) {
12092  LEX(PM_TOKEN_EOF);
12093  }
12094 
12095  // Get a reference to the current mode.
12096  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12097 
12098  // These are the places where we need to split up the content of the
12099  // regular expression. We'll use strpbrk to find the first of these
12100  // characters.
12101  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12102  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12103  pm_regexp_token_buffer_t token_buffer = { 0 };
12104 
12105  while (breakpoint != NULL) {
12106  // If we hit the terminator, we need to determine what kind of
12107  // token to return.
12108  if (*breakpoint == lex_mode->as.regexp.terminator) {
12109  if (lex_mode->as.regexp.nesting > 0) {
12110  parser->current.end = breakpoint + 1;
12111  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12112  lex_mode->as.regexp.nesting--;
12113  continue;
12114  }
12115 
12116  // Here we've hit the terminator. If we have already consumed
12117  // content then we need to return that content as string content
12118  // first.
12119  if (breakpoint > parser->current.start) {
12120  parser->current.end = breakpoint;
12121  pm_regexp_token_buffer_flush(parser, &token_buffer);
12123  }
12124 
12125  // Check here if we need to track the newline.
12126  size_t eol_length = match_eol_at(parser, breakpoint);
12127  if (eol_length) {
12128  parser->current.end = breakpoint + eol_length;
12129  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12130  } else {
12131  parser->current.end = breakpoint + 1;
12132  }
12133 
12134  // Since we've hit the terminator of the regular expression,
12135  // we now need to parse the options.
12136  parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12137 
12138  lex_mode_pop(parser);
12139  lex_state_set(parser, PM_LEX_STATE_END);
12140  LEX(PM_TOKEN_REGEXP_END);
12141  }
12142 
12143  // If we've hit the incrementor, then we need to skip past it
12144  // and find the next breakpoint.
12145  if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12146  parser->current.end = breakpoint + 1;
12147  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12148  lex_mode->as.regexp.nesting++;
12149  continue;
12150  }
12151 
12152  switch (*breakpoint) {
12153  case '\0':
12154  // If we hit a null byte, skip directly past it.
12155  parser->current.end = breakpoint + 1;
12156  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12157  break;
12158  case '\r':
12159  if (peek_at(parser, breakpoint + 1) != '\n') {
12160  parser->current.end = breakpoint + 1;
12161  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12162  break;
12163  }
12164 
12165  breakpoint++;
12166  parser->current.end = breakpoint;
12167  pm_regexp_token_buffer_escape(parser, &token_buffer);
12168  token_buffer.base.cursor = breakpoint;
12169 
12170  /* fallthrough */
12171  case '\n':
12172  // If we've hit a newline, then we need to track that in
12173  // the list of newlines.
12174  if (parser->heredoc_end == NULL) {
12175  pm_newline_list_append(&parser->newline_list, breakpoint);
12176  parser->current.end = breakpoint + 1;
12177  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12178  break;
12179  }
12180 
12181  parser->current.end = breakpoint + 1;
12182  parser_flush_heredoc_end(parser);
12183  pm_regexp_token_buffer_flush(parser, &token_buffer);
12185  case '\\': {
12186  // If we hit escapes, then we need to treat the next
12187  // token literally. In this case we'll skip past the
12188  // next character and find the next breakpoint.
12189  parser->current.end = breakpoint + 1;
12190 
12191  // If we've hit the end of the file, then break out of
12192  // the loop by setting the breakpoint to NULL.
12193  if (parser->current.end == parser->end) {
12194  breakpoint = NULL;
12195  break;
12196  }
12197 
12198  pm_regexp_token_buffer_escape(parser, &token_buffer);
12199  uint8_t peeked = peek(parser);
12200 
12201  switch (peeked) {
12202  case '\r':
12203  parser->current.end++;
12204  if (peek(parser) != '\n') {
12205  if (lex_mode->as.regexp.terminator != '\r') {
12206  pm_token_buffer_push_byte(&token_buffer.base, '\\');
12207  }
12208  pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12209  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12210  break;
12211  }
12212  /* fallthrough */
12213  case '\n':
12214  if (parser->heredoc_end) {
12215  // ... if we are on the same line as a heredoc,
12216  // flush the heredoc and continue parsing after
12217  // heredoc_end.
12218  parser_flush_heredoc_end(parser);
12219  pm_regexp_token_buffer_copy(parser, &token_buffer);
12221  } else {
12222  // ... else track the newline.
12223  pm_newline_list_append(&parser->newline_list, parser->current.end);
12224  }
12225 
12226  parser->current.end++;
12227  break;
12228  case 'c':
12229  case 'C':
12230  case 'M':
12231  case 'u':
12232  case 'x':
12233  escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12234  break;
12235  default:
12236  if (lex_mode->as.regexp.terminator == peeked) {
12237  // Some characters when they are used as the
12238  // terminator also receive an escape. They are
12239  // enumerated here.
12240  switch (peeked) {
12241  case '$': case ')': case '*': case '+':
12242  case '.': case '>': case '?': case ']':
12243  case '^': case '|': case '}':
12244  pm_token_buffer_push_byte(&token_buffer.base, '\\');
12245  break;
12246  default:
12247  break;
12248  }
12249 
12250  pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12251  pm_token_buffer_push_byte(&token_buffer.base, peeked);
12252  parser->current.end++;
12253  break;
12254  }
12255 
12256  if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12257  pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12258  break;
12259  }
12260 
12261  token_buffer.base.cursor = parser->current.end;
12262  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12263  break;
12264  }
12265  case '#': {
12266  // If we hit a #, then we will attempt to lex
12267  // interpolation.
12268  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12269 
12270  if (type == PM_TOKEN_NOT_PROVIDED) {
12271  // If we haven't returned at this point then we had
12272  // something that looked like an interpolated class or
12273  // instance variable like "#@" but wasn't actually. In
12274  // this case we'll just skip to the next breakpoint.
12275  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12276  break;
12277  }
12278 
12279  if (type == PM_TOKEN_STRING_CONTENT) {
12280  pm_regexp_token_buffer_flush(parser, &token_buffer);
12281  }
12282 
12283  LEX(type);
12284  }
12285  default:
12286  assert(false && "unreachable");
12287  break;
12288  }
12289  }
12290 
12291  if (parser->current.end > parser->current.start) {
12292  pm_regexp_token_buffer_flush(parser, &token_buffer);
12294  }
12295 
12296  // If we were unable to find a breakpoint, then this token hits the
12297  // end of the file.
12298  parser->current.end = parser->end;
12299  pm_regexp_token_buffer_flush(parser, &token_buffer);
12301  }
12302  case PM_LEX_STRING: {
12303  // First, we'll set to start of this token to be the current end.
12304  if (parser->next_start == NULL) {
12305  parser->current.start = parser->current.end;
12306  } else {
12307  parser->current.start = parser->next_start;
12308  parser->current.end = parser->next_start;
12309  parser->next_start = NULL;
12310  }
12311 
12312  // We'll check if we're at the end of the file. If we are, then we need to
12313  // return the EOF token.
12314  if (parser->current.end >= parser->end) {
12315  LEX(PM_TOKEN_EOF);
12316  }
12317 
12318  // These are the places where we need to split up the content of the
12319  // string. We'll use strpbrk to find the first of these characters.
12320  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12321  const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12322  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12323 
12324  // If we haven't found an escape yet, then this buffer will be
12325  // unallocated since we can refer directly to the source string.
12326  pm_token_buffer_t token_buffer = { 0 };
12327 
12328  while (breakpoint != NULL) {
12329  // If we hit the incrementor, then we'll increment then nesting and
12330  // continue lexing.
12331  if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12332  lex_mode->as.string.nesting++;
12333  parser->current.end = breakpoint + 1;
12334  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12335  continue;
12336  }
12337 
12338  // Note that we have to check the terminator here first because we could
12339  // potentially be parsing a % string that has a # character as the
12340  // terminator.
12341  if (*breakpoint == lex_mode->as.string.terminator) {
12342  // If this terminator doesn't actually close the string, then we need
12343  // to continue on past it.
12344  if (lex_mode->as.string.nesting > 0) {
12345  parser->current.end = breakpoint + 1;
12346  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12347  lex_mode->as.string.nesting--;
12348  continue;
12349  }
12350 
12351  // Here we've hit the terminator. If we have already consumed content
12352  // then we need to return that content as string content first.
12353  if (breakpoint > parser->current.start) {
12354  parser->current.end = breakpoint;
12355  pm_token_buffer_flush(parser, &token_buffer);
12357  }
12358 
12359  // Otherwise we need to switch back to the parent lex mode and
12360  // return the end of the string.
12361  size_t eol_length = match_eol_at(parser, breakpoint);
12362  if (eol_length) {
12363  parser->current.end = breakpoint + eol_length;
12364  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12365  } else {
12366  parser->current.end = breakpoint + 1;
12367  }
12368 
12369  if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12370  parser->current.end++;
12371  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12372  lex_mode_pop(parser);
12373  LEX(PM_TOKEN_LABEL_END);
12374  }
12375 
12376  lex_state_set(parser, PM_LEX_STATE_END);
12377  lex_mode_pop(parser);
12378  LEX(PM_TOKEN_STRING_END);
12379  }
12380 
12381  switch (*breakpoint) {
12382  case '\0':
12383  // Skip directly past the null character.
12384  parser->current.end = breakpoint + 1;
12385  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12386  break;
12387  case '\r':
12388  if (peek_at(parser, breakpoint + 1) != '\n') {
12389  parser->current.end = breakpoint + 1;
12390  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12391  break;
12392  }
12393 
12394  // If we hit a \r\n sequence, then we need to treat it
12395  // as a newline.
12396  breakpoint++;
12397  parser->current.end = breakpoint;
12398  pm_token_buffer_escape(parser, &token_buffer);
12399  token_buffer.cursor = breakpoint;
12400 
12401  /* fallthrough */
12402  case '\n':
12403  // When we hit a newline, we need to flush any potential
12404  // heredocs. Note that this has to happen after we check
12405  // for the terminator in case the terminator is a
12406  // newline character.
12407  if (parser->heredoc_end == NULL) {
12408  pm_newline_list_append(&parser->newline_list, breakpoint);
12409  parser->current.end = breakpoint + 1;
12410  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12411  break;
12412  }
12413 
12414  parser->current.end = breakpoint + 1;
12415  parser_flush_heredoc_end(parser);
12416  pm_token_buffer_flush(parser, &token_buffer);
12418  case '\\': {
12419  // Here we hit escapes.
12420  parser->current.end = breakpoint + 1;
12421 
12422  // If we've hit the end of the file, then break out of
12423  // the loop by setting the breakpoint to NULL.
12424  if (parser->current.end == parser->end) {
12425  breakpoint = NULL;
12426  continue;
12427  }
12428 
12429  pm_token_buffer_escape(parser, &token_buffer);
12430  uint8_t peeked = peek(parser);
12431 
12432  switch (peeked) {
12433  case '\\':
12434  pm_token_buffer_push_byte(&token_buffer, '\\');
12435  parser->current.end++;
12436  break;
12437  case '\r':
12438  parser->current.end++;
12439  if (peek(parser) != '\n') {
12440  if (!lex_mode->as.string.interpolation) {
12441  pm_token_buffer_push_byte(&token_buffer, '\\');
12442  }
12443  pm_token_buffer_push_byte(&token_buffer, '\r');
12444  break;
12445  }
12446  /* fallthrough */
12447  case '\n':
12448  if (!lex_mode->as.string.interpolation) {
12449  pm_token_buffer_push_byte(&token_buffer, '\\');
12450  pm_token_buffer_push_byte(&token_buffer, '\n');
12451  }
12452 
12453  if (parser->heredoc_end) {
12454  // ... if we are on the same line as a heredoc,
12455  // flush the heredoc and continue parsing after
12456  // heredoc_end.
12457  parser_flush_heredoc_end(parser);
12458  pm_token_buffer_copy(parser, &token_buffer);
12460  } else {
12461  // ... else track the newline.
12462  pm_newline_list_append(&parser->newline_list, parser->current.end);
12463  }
12464 
12465  parser->current.end++;
12466  break;
12467  default:
12468  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12469  pm_token_buffer_push_byte(&token_buffer, peeked);
12470  parser->current.end++;
12471  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12472  pm_token_buffer_push_byte(&token_buffer, peeked);
12473  parser->current.end++;
12474  } else if (lex_mode->as.string.interpolation) {
12475  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12476  } else {
12477  pm_token_buffer_push_byte(&token_buffer, '\\');
12478  pm_token_buffer_push_escaped(&token_buffer, parser);
12479  }
12480 
12481  break;
12482  }
12483 
12484  token_buffer.cursor = parser->current.end;
12485  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12486  break;
12487  }
12488  case '#': {
12489  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12490 
12491  if (type == PM_TOKEN_NOT_PROVIDED) {
12492  // If we haven't returned at this point then we had something that
12493  // looked like an interpolated class or instance variable like "#@"
12494  // but wasn't actually. In this case we'll just skip to the next
12495  // breakpoint.
12496  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12497  break;
12498  }
12499 
12500  if (type == PM_TOKEN_STRING_CONTENT) {
12501  pm_token_buffer_flush(parser, &token_buffer);
12502  }
12503 
12504  LEX(type);
12505  }
12506  default:
12507  assert(false && "unreachable");
12508  }
12509  }
12510 
12511  if (parser->current.end > parser->current.start) {
12512  pm_token_buffer_flush(parser, &token_buffer);
12514  }
12515 
12516  // If we've hit the end of the string, then this is an unterminated
12517  // string. In that case we'll return a string content token.
12518  parser->current.end = parser->end;
12519  pm_token_buffer_flush(parser, &token_buffer);
12521  }
12522  case PM_LEX_HEREDOC: {
12523  // First, we'll set to start of this token.
12524  if (parser->next_start == NULL) {
12525  parser->current.start = parser->current.end;
12526  } else {
12527  parser->current.start = parser->next_start;
12528  parser->current.end = parser->next_start;
12529  parser->heredoc_end = NULL;
12530  parser->next_start = NULL;
12531  }
12532 
12533  // Now let's grab the information about the identifier off of the
12534  // current lex mode.
12535  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12536  pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12537 
12538  bool line_continuation = lex_mode->as.heredoc.line_continuation;
12539  lex_mode->as.heredoc.line_continuation = false;
12540 
12541  // We'll check if we're at the end of the file. If we are, then we
12542  // will add an error (because we weren't able to find the
12543  // terminator) but still continue parsing so that content after the
12544  // declaration of the heredoc can be parsed.
12545  if (parser->current.end >= parser->end) {
12546  pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12547  parser->next_start = lex_mode->as.heredoc.next_start;
12548  parser->heredoc_end = parser->current.end;
12549  lex_state_set(parser, PM_LEX_STATE_END);
12550  lex_mode_pop(parser);
12551  LEX(PM_TOKEN_HEREDOC_END);
12552  }
12553 
12554  const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12555  size_t ident_length = heredoc_lex_mode->ident_length;
12556 
12557  // If we are immediately following a newline and we have hit the
12558  // terminator, then we need to return the ending of the heredoc.
12559  if (current_token_starts_line(parser)) {
12560  const uint8_t *start = parser->current.start;
12561 
12562  if (!line_continuation && (start + ident_length <= parser->end)) {
12563  const uint8_t *newline = next_newline(start, parser->end - start);
12564  const uint8_t *ident_end = newline;
12565  const uint8_t *terminator_end = newline;
12566 
12567  if (newline == NULL) {
12568  terminator_end = parser->end;
12569  ident_end = parser->end;
12570  } else {
12571  terminator_end++;
12572  if (newline[-1] == '\r') {
12573  ident_end--; // Remove \r
12574  }
12575  }
12576 
12577  const uint8_t *terminator_start = ident_end - ident_length;
12578  const uint8_t *cursor = start;
12579 
12580  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12581  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12582  cursor++;
12583  }
12584  }
12585 
12586  if (
12587  (cursor == terminator_start) &&
12588  (memcmp(terminator_start, ident_start, ident_length) == 0)
12589  ) {
12590  if (newline != NULL) {
12591  pm_newline_list_append(&parser->newline_list, newline);
12592  }
12593 
12594  parser->current.end = terminator_end;
12595  if (*lex_mode->as.heredoc.next_start == '\\') {
12596  parser->next_start = NULL;
12597  } else {
12598  parser->next_start = lex_mode->as.heredoc.next_start;
12599  parser->heredoc_end = parser->current.end;
12600  }
12601 
12602  lex_state_set(parser, PM_LEX_STATE_END);
12603  lex_mode_pop(parser);
12604  LEX(PM_TOKEN_HEREDOC_END);
12605  }
12606  }
12607 
12608  size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12609  if (
12610  heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12611  lex_mode->as.heredoc.common_whitespace != NULL &&
12612  (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12613  peek_at(parser, start) != '\n'
12614  ) {
12615  *lex_mode->as.heredoc.common_whitespace = whitespace;
12616  }
12617  }
12618 
12619  // Otherwise we'll be parsing string content. These are the places
12620  // where we need to split up the content of the heredoc. We'll use
12621  // strpbrk to find the first of these characters.
12622  uint8_t breakpoints[] = "\r\n\\#";
12623 
12624  pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12625  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12626  breakpoints[3] = '\0';
12627  }
12628 
12629  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12630  pm_token_buffer_t token_buffer = { 0 };
12631  bool was_line_continuation = false;
12632 
12633  while (breakpoint != NULL) {
12634  switch (*breakpoint) {
12635  case '\0':
12636  // Skip directly past the null character.
12637  parser->current.end = breakpoint + 1;
12638  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12639  break;
12640  case '\r':
12641  parser->current.end = breakpoint + 1;
12642 
12643  if (peek_at(parser, breakpoint + 1) != '\n') {
12644  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12645  break;
12646  }
12647 
12648  // If we hit a \r\n sequence, then we want to replace it
12649  // with a single \n character in the final string.
12650  breakpoint++;
12651  pm_token_buffer_escape(parser, &token_buffer);
12652  token_buffer.cursor = breakpoint;
12653 
12654  /* fallthrough */
12655  case '\n': {
12656  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12657  parser_flush_heredoc_end(parser);
12658  parser->current.end = breakpoint + 1;
12659  pm_token_buffer_flush(parser, &token_buffer);
12661  }
12662 
12663  pm_newline_list_append(&parser->newline_list, breakpoint);
12664 
12665  // If we have a - or ~ heredoc, then we can match after
12666  // some leading whitespace.
12667  const uint8_t *start = breakpoint + 1;
12668 
12669  if (!was_line_continuation && (start + ident_length <= parser->end)) {
12670  // We want to match the terminator starting from the end of the line in case
12671  // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12672  const uint8_t *newline = next_newline(start, parser->end - start);
12673 
12674  if (newline == NULL) {
12675  newline = parser->end;
12676  } else if (newline[-1] == '\r') {
12677  newline--; // Remove \r
12678  }
12679 
12680  // Start of a possible terminator.
12681  const uint8_t *terminator_start = newline - ident_length;
12682 
12683  // Cursor to check for the leading whitespace. We skip the
12684  // leading whitespace if we have a - or ~ heredoc.
12685  const uint8_t *cursor = start;
12686 
12687  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12688  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12689  cursor++;
12690  }
12691  }
12692 
12693  if (
12694  cursor == terminator_start &&
12695  (memcmp(terminator_start, ident_start, ident_length) == 0)
12696  ) {
12697  parser->current.end = breakpoint + 1;
12698  pm_token_buffer_flush(parser, &token_buffer);
12700  }
12701  }
12702 
12703  size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12704 
12705  // If we have hit a newline that is followed by a valid
12706  // terminator, then we need to return the content of the
12707  // heredoc here as string content. Then, the next time a
12708  // token is lexed, it will match again and return the
12709  // end of the heredoc.
12710  if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12711  if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12712  *lex_mode->as.heredoc.common_whitespace = whitespace;
12713  }
12714 
12715  parser->current.end = breakpoint + 1;
12716  pm_token_buffer_flush(parser, &token_buffer);
12718  }
12719 
12720  // Otherwise we hit a newline and it wasn't followed by
12721  // a terminator, so we can continue parsing.
12722  parser->current.end = breakpoint + 1;
12723  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12724  break;
12725  }
12726  case '\\': {
12727  // If we hit an escape, then we need to skip past
12728  // however many characters the escape takes up. However
12729  // it's important that if \n or \r\n are escaped, we
12730  // stop looping before the newline and not after the
12731  // newline so that we can still potentially find the
12732  // terminator of the heredoc.
12733  parser->current.end = breakpoint + 1;
12734 
12735  // If we've hit the end of the file, then break out of
12736  // the loop by setting the breakpoint to NULL.
12737  if (parser->current.end == parser->end) {
12738  breakpoint = NULL;
12739  continue;
12740  }
12741 
12742  pm_token_buffer_escape(parser, &token_buffer);
12743  uint8_t peeked = peek(parser);
12744 
12745  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12746  switch (peeked) {
12747  case '\r':
12748  parser->current.end++;
12749  if (peek(parser) != '\n') {
12750  pm_token_buffer_push_byte(&token_buffer, '\\');
12751  pm_token_buffer_push_byte(&token_buffer, '\r');
12752  break;
12753  }
12754  /* fallthrough */
12755  case '\n':
12756  pm_token_buffer_push_byte(&token_buffer, '\\');
12757  pm_token_buffer_push_byte(&token_buffer, '\n');
12758  token_buffer.cursor = parser->current.end + 1;
12759  breakpoint = parser->current.end;
12760  continue;
12761  default:
12762  pm_token_buffer_push_byte(&token_buffer, '\\');
12763  pm_token_buffer_push_escaped(&token_buffer, parser);
12764  break;
12765  }
12766  } else {
12767  switch (peeked) {
12768  case '\r':
12769  parser->current.end++;
12770  if (peek(parser) != '\n') {
12771  pm_token_buffer_push_byte(&token_buffer, '\r');
12772  break;
12773  }
12774  /* fallthrough */
12775  case '\n':
12776  // If we are in a tilde here, we should
12777  // break out of the loop and return the
12778  // string content.
12779  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12780  const uint8_t *end = parser->current.end;
12781  pm_newline_list_append(&parser->newline_list, end);
12782 
12783  // Here we want the buffer to only
12784  // include up to the backslash.
12785  parser->current.end = breakpoint;
12786  pm_token_buffer_flush(parser, &token_buffer);
12787 
12788  // Now we can advance the end of the
12789  // token past the newline.
12790  parser->current.end = end + 1;
12791  lex_mode->as.heredoc.line_continuation = true;
12793  }
12794 
12795  was_line_continuation = true;
12796  token_buffer.cursor = parser->current.end + 1;
12797  breakpoint = parser->current.end;
12798  continue;
12799  default:
12800  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12801  break;
12802  }
12803  }
12804 
12805  token_buffer.cursor = parser->current.end;
12806  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12807  break;
12808  }
12809  case '#': {
12810  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12811 
12812  if (type == PM_TOKEN_NOT_PROVIDED) {
12813  // If we haven't returned at this point then we had
12814  // something that looked like an interpolated class
12815  // or instance variable like "#@" but wasn't
12816  // actually. In this case we'll just skip to the
12817  // next breakpoint.
12818  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12819  break;
12820  }
12821 
12822  if (type == PM_TOKEN_STRING_CONTENT) {
12823  pm_token_buffer_flush(parser, &token_buffer);
12824  }
12825 
12826  LEX(type);
12827  }
12828  default:
12829  assert(false && "unreachable");
12830  }
12831 
12832  was_line_continuation = false;
12833  }
12834 
12835  if (parser->current.end > parser->current.start) {
12836  parser->current.end = parser->end;
12837  pm_token_buffer_flush(parser, &token_buffer);
12839  }
12840 
12841  // If we've hit the end of the string, then this is an unterminated
12842  // heredoc. In that case we'll return a string content token.
12843  parser->current.end = parser->end;
12844  pm_token_buffer_flush(parser, &token_buffer);
12846  }
12847  }
12848 
12849  assert(false && "unreachable");
12850 }
12851 
12852 #undef LEX
12853 
12854 /******************************************************************************/
12855 /* Parse functions */
12856 /******************************************************************************/
12857 
12866 typedef enum {
12867  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12868  PM_BINDING_POWER_STATEMENT = 2,
12869  PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12870  PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12871  PM_BINDING_POWER_COMPOSITION = 8, // and or
12872  PM_BINDING_POWER_NOT = 10, // not
12873  PM_BINDING_POWER_MATCH = 12, // => in
12874  PM_BINDING_POWER_DEFINED = 14, // defined?
12875  PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12876  PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12877  PM_BINDING_POWER_TERNARY = 20, // ?:
12878  PM_BINDING_POWER_RANGE = 22, // .. ...
12879  PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12880  PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12881  PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12882  PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12883  PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12884  PM_BINDING_POWER_BITWISE_AND = 34, // &
12885  PM_BINDING_POWER_SHIFT = 36, // << >>
12886  PM_BINDING_POWER_TERM = 38, // + -
12887  PM_BINDING_POWER_FACTOR = 40, // * / %
12888  PM_BINDING_POWER_UMINUS = 42, // -@
12889  PM_BINDING_POWER_EXPONENT = 44, // **
12890  PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12891  PM_BINDING_POWER_INDEX = 48, // [] []=
12892  PM_BINDING_POWER_CALL = 50, // :: .
12893  PM_BINDING_POWER_MAX = 52
12894 } pm_binding_power_t;
12895 
12900 typedef struct {
12902  pm_binding_power_t left;
12903 
12905  pm_binding_power_t right;
12906 
12908  bool binary;
12909 
12914  bool nonassoc;
12916 
12917 #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12918 #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12919 #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12920 #define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12921 #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12922 
12923 pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12924  // rescue
12925  [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
12926 
12927  // if unless until while
12928  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12929  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12930  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12931  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12932 
12933  // and or
12934  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12935  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12936 
12937  // => in
12938  [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12939  [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12940 
12941  // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12942  [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12943  [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12944  [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12945  [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12946  [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12947  [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12948  [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12949  [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12950  [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12951  [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12952  [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12953  [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12954  [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12955  [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12956 
12957  // ?:
12958  [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12959 
12960  // .. ...
12961  [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12962  [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12963  [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12964  [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12965 
12966  // ||
12967  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12968 
12969  // &&
12970  [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12971 
12972  // != !~ == === =~ <=>
12973  [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12974  [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12975  [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12976  [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12977  [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12978  [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12979 
12980  // > >= < <=
12981  [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12982  [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12983  [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12984  [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12985 
12986  // ^ |
12987  [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12988  [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12989 
12990  // &
12991  [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12992 
12993  // >> <<
12994  [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12995  [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12996 
12997  // - +
12998  [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12999  [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
13000 
13001  // % / *
13002  [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13003  [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13004  [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
13005  [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
13006 
13007  // -@
13008  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
13009  [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
13010 
13011  // **
13012  [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
13013  [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13014 
13015  // ! ~ +@
13016  [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13017  [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13018  [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13019 
13020  // [
13021  [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13022 
13023  // :: . &.
13024  [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13025  [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13026  [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13027 };
13028 
13029 #undef BINDING_POWER_ASSIGNMENT
13030 #undef LEFT_ASSOCIATIVE
13031 #undef RIGHT_ASSOCIATIVE
13032 #undef RIGHT_ASSOCIATIVE_UNARY
13033 
13037 static inline bool
13038 match1(const pm_parser_t *parser, pm_token_type_t type) {
13039  return parser->current.type == type;
13040 }
13041 
13045 static inline bool
13046 match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13047  return match1(parser, type1) || match1(parser, type2);
13048 }
13049 
13053 static inline bool
13054 match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13055  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13056 }
13057 
13061 static inline bool
13062 match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13063  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13064 }
13065 
13069 static inline bool
13070 match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13071  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13072 }
13073 
13077 static inline bool
13078 match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13079  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13080 }
13081 
13085 static inline bool
13086 match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13087  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13088 }
13089 
13096 static bool
13097 accept1(pm_parser_t *parser, pm_token_type_t type) {
13098  if (match1(parser, type)) {
13099  parser_lex(parser);
13100  return true;
13101  }
13102  return false;
13103 }
13104 
13109 static inline bool
13110 accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13111  if (match2(parser, type1, type2)) {
13112  parser_lex(parser);
13113  return true;
13114  }
13115  return false;
13116 }
13117 
13122 static inline bool
13123 accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13124  if (match3(parser, type1, type2, type3)) {
13125  parser_lex(parser);
13126  return true;
13127  }
13128  return false;
13129 }
13130 
13142 static void
13143 expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13144  if (accept1(parser, type)) return;
13145 
13146  const uint8_t *location = parser->previous.end;
13147  pm_parser_err(parser, location, location, diag_id);
13148 
13149  parser->previous.start = location;
13150  parser->previous.type = PM_TOKEN_MISSING;
13151 }
13152 
13157 static void
13158 expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13159  if (accept2(parser, type1, type2)) return;
13160 
13161  const uint8_t *location = parser->previous.end;
13162  pm_parser_err(parser, location, location, diag_id);
13163 
13164  parser->previous.start = location;
13165  parser->previous.type = PM_TOKEN_MISSING;
13166 }
13167 
13171 static void
13172 expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13173  if (accept3(parser, type1, type2, type3)) return;
13174 
13175  const uint8_t *location = parser->previous.end;
13176  pm_parser_err(parser, location, location, diag_id);
13177 
13178  parser->previous.start = location;
13179  parser->previous.type = PM_TOKEN_MISSING;
13180 }
13181 
13186 static void
13187 expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13188  if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13189  parser_lex(parser);
13190  } else {
13191  pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13192  parser->previous.start = parser->previous.end;
13193  parser->previous.type = PM_TOKEN_MISSING;
13194  }
13195 }
13196 
13197 static pm_node_t *
13198 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13199 
13204 static pm_node_t *
13205 parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13206  pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13207  pm_assert_value_expression(parser, node);
13208  return node;
13209 }
13210 
13229 static inline bool
13230 token_begins_expression_p(pm_token_type_t type) {
13231  switch (type) {
13233  case PM_TOKEN_KEYWORD_IN:
13234  // We need to special case this because it is a binary operator that
13235  // should not be marked as beginning an expression.
13236  return false;
13237  case PM_TOKEN_BRACE_RIGHT:
13239  case PM_TOKEN_COLON:
13240  case PM_TOKEN_COMMA:
13241  case PM_TOKEN_EMBEXPR_END:
13242  case PM_TOKEN_EOF:
13243  case PM_TOKEN_LAMBDA_BEGIN:
13244  case PM_TOKEN_KEYWORD_DO:
13246  case PM_TOKEN_KEYWORD_END:
13247  case PM_TOKEN_KEYWORD_ELSE:
13250  case PM_TOKEN_KEYWORD_THEN:
13252  case PM_TOKEN_KEYWORD_WHEN:
13253  case PM_TOKEN_NEWLINE:
13255  case PM_TOKEN_SEMICOLON:
13256  // The reason we need this short-circuit is because we're using the
13257  // binding powers table to tell us if the subsequent token could
13258  // potentially be the start of an expression. If there _is_ a binding
13259  // power for one of these tokens, then we should remove it from this list
13260  // and let it be handled by the default case below.
13261  assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13262  return false;
13263  case PM_TOKEN_UAMPERSAND:
13264  // This is a special case because this unary operator cannot appear
13265  // as a general operator, it only appears in certain circumstances.
13266  return false;
13267  case PM_TOKEN_UCOLON_COLON:
13268  case PM_TOKEN_UMINUS:
13269  case PM_TOKEN_UMINUS_NUM:
13270  case PM_TOKEN_UPLUS:
13271  case PM_TOKEN_BANG:
13272  case PM_TOKEN_TILDE:
13273  case PM_TOKEN_UDOT_DOT:
13274  case PM_TOKEN_UDOT_DOT_DOT:
13275  // These unary tokens actually do have binding power associated with them
13276  // so that we can correctly place them into the precedence order. But we
13277  // want them to be marked as beginning an expression, so we need to
13278  // special case them here.
13279  return true;
13280  default:
13281  return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13282  }
13283 }
13284 
13289 static pm_node_t *
13290 parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13291  if (accept1(parser, PM_TOKEN_USTAR)) {
13292  pm_token_t operator = parser->previous;
13293  pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13294  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13295  }
13296 
13297  return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13298 }
13299 
13304 static void
13305 parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13306  // The method name needs to change. If we previously had
13307  // foo, we now need foo=. In this case we'll allocate a new
13308  // owned string, copy the previous method name in, and
13309  // append an =.
13310  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13311  size_t length = constant->length;
13312  uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13313  if (name == NULL) return;
13314 
13315  memcpy(name, constant->start, length);
13316  name[length] = '=';
13317 
13318  // Now switch the name to the new string.
13319  // This silences clang analyzer warning about leak of memory pointed by `name`.
13320  // NOLINTNEXTLINE(clang-analyzer-*)
13321  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13322 }
13323 
13330 static pm_node_t *
13331 parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13332  switch (PM_NODE_TYPE(target)) {
13333  case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13334  case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13335  case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13336  case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13337  case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13338  case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13339  case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13340  default: break;
13341  }
13342 
13343  pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13344  pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13345 
13346  pm_node_destroy(parser, target);
13347  return (pm_node_t *) result;
13348 }
13349 
13355 static void
13356 parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13357  pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13358 
13359  for (size_t index = 0; index < implicit_parameters->size; index++) {
13360  if (implicit_parameters->nodes[index] == node) {
13361  // If the node is not the last one in the list, we need to shift the
13362  // remaining nodes down to fill the gap. This is extremely unlikely
13363  // to happen.
13364  if (index != implicit_parameters->size - 1) {
13365  memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13366  }
13367 
13368  implicit_parameters->size--;
13369  break;
13370  }
13371  }
13372 }
13373 
13382 static pm_node_t *
13383 parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13384  switch (PM_NODE_TYPE(target)) {
13385  case PM_MISSING_NODE:
13386  return target;
13388  case PM_FALSE_NODE:
13389  case PM_SOURCE_FILE_NODE:
13390  case PM_SOURCE_LINE_NODE:
13391  case PM_NIL_NODE:
13392  case PM_SELF_NODE:
13393  case PM_TRUE_NODE: {
13394  // In these special cases, we have specific error messages and we
13395  // will replace them with local variable writes.
13396  return parse_unwriteable_target(parser, target);
13397  }
13401  return target;
13402  case PM_CONSTANT_PATH_NODE:
13403  if (context_def_p(parser)) {
13404  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13405  }
13406 
13407  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
13409 
13410  return target;
13411  case PM_CONSTANT_READ_NODE:
13412  if (context_def_p(parser)) {
13413  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13414  }
13415 
13416  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13417  target->type = PM_CONSTANT_TARGET_NODE;
13418 
13419  return target;
13422  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13423  return target;
13427  return target;
13429  if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13430  PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13431  parse_target_implicit_parameter(parser, target);
13432  }
13433 
13434  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13435  uint32_t name = cast->name;
13436  uint32_t depth = cast->depth;
13437  pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13438 
13441 
13442  return target;
13443  }
13445  pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13446  pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13447 
13448  parse_target_implicit_parameter(parser, target);
13449  pm_node_destroy(parser, target);
13450 
13451  return node;
13452  }
13456  return target;
13457  case PM_MULTI_TARGET_NODE:
13458  if (splat_parent) {
13459  // Multi target is not accepted in all positions. If this is one
13460  // of them, then we need to add an error.
13461  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13462  }
13463 
13464  return target;
13465  case PM_SPLAT_NODE: {
13466  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13467 
13468  if (splat->expression != NULL) {
13469  splat->expression = parse_target(parser, splat->expression, multiple, true);
13470  }
13471 
13472  return (pm_node_t *) splat;
13473  }
13474  case PM_CALL_NODE: {
13475  pm_call_node_t *call = (pm_call_node_t *) target;
13476 
13477  // If we have no arguments to the call node and we need this to be a
13478  // target then this is either a method call or a local variable
13479  // write.
13480  if (
13481  (call->message_loc.start != NULL) &&
13482  (call->message_loc.end[-1] != '!') &&
13483  (call->message_loc.end[-1] != '?') &&
13484  (call->opening_loc.start == NULL) &&
13485  (call->arguments == NULL) &&
13486  (call->block == NULL)
13487  ) {
13488  if (call->receiver == NULL) {
13489  // When we get here, we have a local variable write, because it
13490  // was previously marked as a method call but now we have an =.
13491  // This looks like:
13492  //
13493  // foo = 1
13494  //
13495  // When it was parsed in the prefix position, foo was seen as a
13496  // method call with no receiver and no arguments. Now we have an
13497  // =, so we know it's a local variable write.
13498  const pm_location_t message_loc = call->message_loc;
13499 
13500  pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13501  pm_node_destroy(parser, target);
13502 
13503  return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13504  }
13505 
13506  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13507  if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13508  pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13509  }
13510 
13511  parse_write_name(parser, &call->name);
13512  return (pm_node_t *) pm_call_target_node_create(parser, call);
13513  }
13514  }
13515 
13516  // If there is no call operator and the message is "[]" then this is
13517  // an aref expression, and we can transform it into an aset
13518  // expression.
13519  if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13520  return (pm_node_t *) pm_index_target_node_create(parser, call);
13521  }
13522  }
13523  /* fallthrough */
13524  default:
13525  // In this case we have a node that we don't know how to convert
13526  // into a target. We need to treat it as an error. For now, we'll
13527  // mark it as an error and just skip right past it.
13528  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13529  return target;
13530  }
13531 }
13532 
13537 static pm_node_t *
13538 parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13539  pm_node_t *result = parse_target(parser, target, multiple, false);
13540 
13541  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13542  // parens after the targets.
13543  if (
13544  !match1(parser, PM_TOKEN_EQUAL) &&
13545  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13546  !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13547  ) {
13548  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13549  }
13550 
13551  return result;
13552 }
13553 
13558 static pm_node_t *
13559 parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13560  pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13561 
13562  if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13563  return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13564  }
13565 
13566  return write;
13567 }
13568 
13572 static pm_node_t *
13573 parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13574  switch (PM_NODE_TYPE(target)) {
13575  case PM_MISSING_NODE:
13576  pm_node_destroy(parser, value);
13577  return target;
13579  pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13580  pm_node_destroy(parser, target);
13581  return (pm_node_t *) node;
13582  }
13583  case PM_CONSTANT_PATH_NODE: {
13584  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13585 
13586  if (context_def_p(parser)) {
13587  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13588  }
13589 
13590  return parse_shareable_constant_write(parser, node);
13591  }
13592  case PM_CONSTANT_READ_NODE: {
13593  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13594 
13595  if (context_def_p(parser)) {
13596  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13597  }
13598 
13599  pm_node_destroy(parser, target);
13600  return parse_shareable_constant_write(parser, node);
13601  }
13604  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13605  /* fallthrough */
13607  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13608  pm_node_destroy(parser, target);
13609  return (pm_node_t *) node;
13610  }
13613 
13614  pm_constant_id_t name = local_read->name;
13615  pm_location_t name_loc = target->location;
13616 
13617  uint32_t depth = local_read->depth;
13618  pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13619 
13620  if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13621  pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13622  PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13623  parse_target_implicit_parameter(parser, target);
13624  }
13625 
13626  pm_locals_unread(&scope->locals, name);
13627  pm_node_destroy(parser, target);
13628 
13629  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13630  }
13632  pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13633  pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13634 
13635  parse_target_implicit_parameter(parser, target);
13636  pm_node_destroy(parser, target);
13637 
13638  return node;
13639  }
13641  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13642  pm_node_destroy(parser, target);
13643  return write_node;
13644  }
13645  case PM_MULTI_TARGET_NODE:
13646  return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13647  case PM_SPLAT_NODE: {
13648  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13649 
13650  if (splat->expression != NULL) {
13651  splat->expression = parse_write(parser, splat->expression, operator, value);
13652  }
13653 
13654  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13655  pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13656 
13657  return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13658  }
13659  case PM_CALL_NODE: {
13660  pm_call_node_t *call = (pm_call_node_t *) target;
13661 
13662  // If we have no arguments to the call node and we need this to be a
13663  // target then this is either a method call or a local variable
13664  // write.
13665  if (
13666  (call->message_loc.start != NULL) &&
13667  (call->message_loc.end[-1] != '!') &&
13668  (call->message_loc.end[-1] != '?') &&
13669  (call->opening_loc.start == NULL) &&
13670  (call->arguments == NULL) &&
13671  (call->block == NULL)
13672  ) {
13673  if (call->receiver == NULL) {
13674  // When we get here, we have a local variable write, because it
13675  // was previously marked as a method call but now we have an =.
13676  // This looks like:
13677  //
13678  // foo = 1
13679  //
13680  // When it was parsed in the prefix position, foo was seen as a
13681  // method call with no receiver and no arguments. Now we have an
13682  // =, so we know it's a local variable write.
13683  const pm_location_t message = call->message_loc;
13684 
13685  pm_parser_local_add_location(parser, message.start, message.end, 0);
13686  pm_node_destroy(parser, target);
13687 
13688  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13689  target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13690 
13691  pm_refute_numbered_parameter(parser, message.start, message.end);
13692  return target;
13693  }
13694 
13695  if (char_is_identifier_start(parser, call->message_loc.start)) {
13696  // When we get here, we have a method call, because it was
13697  // previously marked as a method call but now we have an =. This
13698  // looks like:
13699  //
13700  // foo.bar = 1
13701  //
13702  // When it was parsed in the prefix position, foo.bar was seen as a
13703  // method call with no arguments. Now we have an =, so we know it's
13704  // a method call with an argument. In this case we will create the
13705  // arguments node, parse the argument, and add it to the list.
13706  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13707  call->arguments = arguments;
13708 
13709  pm_arguments_node_arguments_append(arguments, value);
13710  call->base.location.end = arguments->base.location.end;
13711 
13712  parse_write_name(parser, &call->name);
13713  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13714 
13715  return (pm_node_t *) call;
13716  }
13717  }
13718 
13719  // If there is no call operator and the message is "[]" then this is
13720  // an aref expression, and we can transform it into an aset
13721  // expression.
13722  if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13723  if (call->arguments == NULL) {
13724  call->arguments = pm_arguments_node_create(parser);
13725  }
13726 
13727  pm_arguments_node_arguments_append(call->arguments, value);
13728  target->location.end = value->location.end;
13729 
13730  // Replace the name with "[]=".
13731  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13732  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13733 
13734  return target;
13735  }
13736 
13737  // If there are arguments on the call node, then it can't be a method
13738  // call ending with = or a local variable write, so it must be a
13739  // syntax error. In this case we'll fall through to our default
13740  // handling. We need to free the value that we parsed because there
13741  // is no way for us to attach it to the tree at this point.
13742  pm_node_destroy(parser, value);
13743  }
13744  /* fallthrough */
13745  default:
13746  // In this case we have a node that we don't know how to convert into a
13747  // target. We need to treat it as an error. For now, we'll mark it as an
13748  // error and just skip right past it.
13749  pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13750  return target;
13751  }
13752 }
13753 
13760 static pm_node_t *
13761 parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13762  switch (PM_NODE_TYPE(target)) {
13763  case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13764  case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13765  case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13766  case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13767  case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13768  case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13769  case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13770  default: break;
13771  }
13772 
13773  pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13774  pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13775 
13776  pm_node_destroy(parser, target);
13777  return (pm_node_t *) result;
13778 }
13779 
13790 static pm_node_t *
13791 parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13792  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13793 
13794  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13795  pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13796 
13797  while (accept1(parser, PM_TOKEN_COMMA)) {
13798  if (accept1(parser, PM_TOKEN_USTAR)) {
13799  // Here we have a splat operator. It can have a name or be
13800  // anonymous. It can be the final target or be in the middle if
13801  // there haven't been any others yet.
13802  if (has_rest) {
13803  pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13804  }
13805 
13806  pm_token_t star_operator = parser->previous;
13807  pm_node_t *name = NULL;
13808 
13809  if (token_begins_expression_p(parser->current.type)) {
13810  name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13811  name = parse_target(parser, name, true, true);
13812  }
13813 
13814  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13815  pm_multi_target_node_targets_append(parser, result, splat);
13816  has_rest = true;
13817  } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13818  context_push(parser, PM_CONTEXT_MULTI_TARGET);
13819  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13820  target = parse_target(parser, target, true, false);
13821 
13822  pm_multi_target_node_targets_append(parser, result, target);
13823  context_pop(parser);
13824  } else if (token_begins_expression_p(parser->current.type)) {
13825  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13826  target = parse_target(parser, target, true, false);
13827 
13828  pm_multi_target_node_targets_append(parser, result, target);
13829  } else if (!match1(parser, PM_TOKEN_EOF)) {
13830  // If we get here, then we have a trailing , in a multi target node.
13831  // We'll add an implicit rest node to represent this.
13832  pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13833  pm_multi_target_node_targets_append(parser, result, rest);
13834  break;
13835  }
13836  }
13837 
13838  return (pm_node_t *) result;
13839 }
13840 
13845 static pm_node_t *
13846 parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13847  pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13848  accept1(parser, PM_TOKEN_NEWLINE);
13849 
13850  // Ensure that we have either an = or a ) after the targets.
13851  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13852  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13853  }
13854 
13855  return result;
13856 }
13857 
13861 static pm_statements_node_t *
13862 parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13863  // First, skip past any optional terminators that might be at the beginning
13864  // of the statements.
13865  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13866 
13867  // If we have a terminator, then we can just return NULL.
13868  if (context_terminator(context, &parser->current)) return NULL;
13869 
13870  pm_statements_node_t *statements = pm_statements_node_create(parser);
13871 
13872  // At this point we know we have at least one statement, and that it
13873  // immediately follows the current token.
13874  context_push(parser, context);
13875 
13876  while (true) {
13877  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13878  pm_statements_node_body_append(parser, statements, node, true);
13879 
13880  // If we're recovering from a syntax error, then we need to stop parsing
13881  // the statements now.
13882  if (parser->recovering) {
13883  // If this is the level of context where the recovery has happened,
13884  // then we can mark the parser as done recovering.
13885  if (context_terminator(context, &parser->current)) parser->recovering = false;
13886  break;
13887  }
13888 
13889  // If we have a terminator, then we will parse all consecutive
13890  // terminators and then continue parsing the statements list.
13891  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13892  // If we have a terminator, then we will continue parsing the
13893  // statements list.
13894  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13895  if (context_terminator(context, &parser->current)) break;
13896 
13897  // Now we can continue parsing the list of statements.
13898  continue;
13899  }
13900 
13901  // At this point we have a list of statements that are not terminated by
13902  // a newline or semicolon. At this point we need to check if we're at
13903  // the end of the statements list. If we are, then we should break out
13904  // of the loop.
13905  if (context_terminator(context, &parser->current)) break;
13906 
13907  // At this point, we have a syntax error, because the statement was not
13908  // terminated by a newline or semicolon, and we're not at the end of the
13909  // statements list. Ideally we should scan forward to determine if we
13910  // should insert a missing terminator or break out of parsing the
13911  // statements list at this point.
13912  //
13913  // We don't have that yet, so instead we'll do a more naive approach. If
13914  // we were unable to parse an expression, then we will skip past this
13915  // token and continue parsing the statements list. Otherwise we'll add
13916  // an error and continue parsing the statements list.
13917  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13918  parser_lex(parser);
13919 
13920  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13921  if (context_terminator(context, &parser->current)) break;
13922  } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13923  // This is an inlined version of accept1 because the error that we
13924  // want to add has varargs. If this happens again, we should
13925  // probably extract a helper function.
13926  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13927  parser->previous.start = parser->previous.end;
13928  parser->previous.type = PM_TOKEN_MISSING;
13929  }
13930  }
13931 
13932  context_pop(parser);
13933  bool last_value = true;
13934  switch (context) {
13936  case PM_CONTEXT_DEF_ENSURE:
13937  last_value = false;
13938  break;
13939  default:
13940  break;
13941  }
13942  pm_void_statements_check(parser, statements, last_value);
13943 
13944  return statements;
13945 }
13946 
13951 static void
13952 pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13953  const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13954 
13955  if (duplicated != NULL) {
13956  pm_buffer_t buffer = { 0 };
13957  pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13958 
13960  &parser->warning_list,
13961  duplicated->location.start,
13962  duplicated->location.end,
13963  PM_WARN_DUPLICATED_HASH_KEY,
13964  (int) pm_buffer_length(&buffer),
13965  pm_buffer_value(&buffer),
13967  );
13968 
13969  pm_buffer_free(&buffer);
13970  }
13971 }
13972 
13977 static void
13978 pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13979  pm_node_t *previous;
13980 
13981  if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13983  &parser->warning_list,
13984  node->location.start,
13985  node->location.end,
13986  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13989  );
13990  }
13991 }
13992 
13996 static bool
13997 parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13999  bool contains_keyword_splat = false;
14000 
14001  while (true) {
14002  pm_node_t *element;
14003 
14004  switch (parser->current.type) {
14005  case PM_TOKEN_USTAR_STAR: {
14006  parser_lex(parser);
14007  pm_token_t operator = parser->previous;
14008  pm_node_t *value = NULL;
14009 
14010  if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
14011  // If we're about to parse a nested hash that is being
14012  // pushed into this hash directly with **, then we want the
14013  // inner hash to share the static literals with the outer
14014  // hash.
14015  parser->current_hash_keys = literals;
14016  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14017  } else if (token_begins_expression_p(parser->current.type)) {
14018  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14019  } else {
14020  pm_parser_scope_forwarding_keywords_check(parser, &operator);
14021  }
14022 
14023  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14024  contains_keyword_splat = true;
14025  break;
14026  }
14027  case PM_TOKEN_LABEL: {
14028  pm_token_t label = parser->current;
14029  parser_lex(parser);
14030 
14031  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14032  pm_hash_key_static_literals_add(parser, literals, key);
14033 
14034  pm_token_t operator = not_provided(parser);
14035  pm_node_t *value = NULL;
14036 
14037  if (token_begins_expression_p(parser->current.type)) {
14038  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14039  } else {
14040  if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14041  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14042  value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14043  } else {
14044  int depth = -1;
14045  pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14046 
14047  if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14048  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14049  } else {
14050  depth = pm_parser_local_depth(parser, &identifier);
14051  }
14052 
14053  if (depth == -1) {
14054  value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14055  } else {
14056  value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14057  }
14058  }
14059 
14060  value->location.end++;
14061  value = (pm_node_t *) pm_implicit_node_create(parser, value);
14062  }
14063 
14064  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14065  break;
14066  }
14067  default: {
14068  pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14069 
14070  // Hash keys that are strings are automatically frozen. We will
14071  // mark that here.
14072  if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14073  pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14074  }
14075 
14076  pm_hash_key_static_literals_add(parser, literals, key);
14077 
14078  pm_token_t operator;
14079  if (pm_symbol_node_label_p(key)) {
14080  operator = not_provided(parser);
14081  } else {
14082  expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14083  operator = parser->previous;
14084  }
14085 
14086  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14087  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14088  break;
14089  }
14090  }
14091 
14092  if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14093  pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14094  } else {
14095  pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14096  }
14097 
14098  // If there's no comma after the element, then we're done.
14099  if (!accept1(parser, PM_TOKEN_COMMA)) break;
14100 
14101  // If the next element starts with a label or a **, then we know we have
14102  // another element in the hash, so we'll continue parsing.
14103  if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14104 
14105  // Otherwise we need to check if the subsequent token begins an expression.
14106  // If it does, then we'll continue parsing.
14107  if (token_begins_expression_p(parser->current.type)) continue;
14108 
14109  // Otherwise by default we will exit out of this loop.
14110  break;
14111  }
14112 
14113  return contains_keyword_splat;
14114 }
14115 
14119 static inline void
14120 parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14121  if (arguments->arguments == NULL) {
14122  arguments->arguments = pm_arguments_node_create(parser);
14123  }
14124 
14125  pm_arguments_node_arguments_append(arguments->arguments, argument);
14126 }
14127 
14131 static void
14132 parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14133  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14134 
14135  // First we need to check if the next token is one that could be the start
14136  // of an argument. If it's not, then we can just return.
14137  if (
14138  match2(parser, terminator, PM_TOKEN_EOF) ||
14139  (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14140  context_terminator(parser->current_context->context, &parser->current)
14141  ) {
14142  return;
14143  }
14144 
14145  bool parsed_first_argument = false;
14146  bool parsed_bare_hash = false;
14147  bool parsed_block_argument = false;
14148  bool parsed_forwarding_arguments = false;
14149 
14150  while (!match1(parser, PM_TOKEN_EOF)) {
14151  if (parsed_block_argument) {
14152  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14153  }
14154  if (parsed_forwarding_arguments) {
14155  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14156  }
14157 
14158  pm_node_t *argument = NULL;
14159 
14160  switch (parser->current.type) {
14161  case PM_TOKEN_USTAR_STAR:
14162  case PM_TOKEN_LABEL: {
14163  if (parsed_bare_hash) {
14164  pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14165  }
14166 
14167  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14168  argument = (pm_node_t *) hash;
14169 
14170  pm_static_literals_t hash_keys = { 0 };
14171  bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14172 
14173  parse_arguments_append(parser, arguments, argument);
14174 
14176  if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14177  pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14178 
14179  pm_static_literals_free(&hash_keys);
14180  parsed_bare_hash = true;
14181 
14182  break;
14183  }
14184  case PM_TOKEN_UAMPERSAND: {
14185  parser_lex(parser);
14186  pm_token_t operator = parser->previous;
14187  pm_node_t *expression = NULL;
14188 
14189  if (token_begins_expression_p(parser->current.type)) {
14190  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14191  } else {
14192  pm_parser_scope_forwarding_block_check(parser, &operator);
14193  }
14194 
14195  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14196  if (parsed_block_argument) {
14197  parse_arguments_append(parser, arguments, argument);
14198  } else {
14199  arguments->block = argument;
14200  }
14201 
14202  parsed_block_argument = true;
14203  break;
14204  }
14205  case PM_TOKEN_USTAR: {
14206  parser_lex(parser);
14207  pm_token_t operator = parser->previous;
14208 
14210  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14211  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14212  if (parsed_bare_hash) {
14213  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14214  }
14215  } else {
14216  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14217 
14218  if (parsed_bare_hash) {
14219  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14220  }
14221 
14222  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14223  }
14224 
14225  parse_arguments_append(parser, arguments, argument);
14226  break;
14227  }
14228  case PM_TOKEN_UDOT_DOT_DOT: {
14229  if (accepts_forwarding) {
14230  parser_lex(parser);
14231 
14232  if (token_begins_expression_p(parser->current.type)) {
14233  // If the token begins an expression then this ... was
14234  // not actually argument forwarding but was instead a
14235  // range.
14236  pm_token_t operator = parser->previous;
14237  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14238 
14239  // If we parse a range, we need to validate that we
14240  // didn't accidentally violate the nonassoc rules of the
14241  // ... operator.
14242  if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14243  pm_range_node_t *range = (pm_range_node_t *) right;
14244  pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14245  }
14246 
14247  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14248  } else {
14249  pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14250  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14251  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14252  }
14253 
14254  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14255  parse_arguments_append(parser, arguments, argument);
14256  pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14257  arguments->has_forwarding = true;
14258  parsed_forwarding_arguments = true;
14259  break;
14260  }
14261  }
14262  }
14263  /* fallthrough */
14264  default: {
14265  if (argument == NULL) {
14266  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14267  }
14268 
14269  bool contains_keywords = false;
14270  bool contains_keyword_splat = false;
14271 
14272  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14273  if (parsed_bare_hash) {
14274  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14275  }
14276 
14277  pm_token_t operator;
14278  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14279  operator = parser->previous;
14280  } else {
14281  operator = not_provided(parser);
14282  }
14283 
14284  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14285  contains_keywords = true;
14286 
14287  // Create the set of static literals for this hash.
14288  pm_static_literals_t hash_keys = { 0 };
14289  pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14290 
14291  // Finish parsing the one we are part way through.
14292  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14293  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14294 
14295  pm_keyword_hash_node_elements_append(bare_hash, argument);
14296  argument = (pm_node_t *) bare_hash;
14297 
14298  // Then parse more if we have a comma
14299  if (accept1(parser, PM_TOKEN_COMMA) && (
14300  token_begins_expression_p(parser->current.type) ||
14301  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14302  )) {
14303  contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14304  }
14305 
14306  pm_static_literals_free(&hash_keys);
14307  parsed_bare_hash = true;
14308  }
14309 
14310  parse_arguments_append(parser, arguments, argument);
14311 
14312  pm_node_flags_t flags = 0;
14313  if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14314  if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14315  pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14316 
14317  break;
14318  }
14319  }
14320 
14321  parsed_first_argument = true;
14322 
14323  // If parsing the argument failed, we need to stop parsing arguments.
14324  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14325 
14326  // If the terminator of these arguments is not EOF, then we have a
14327  // specific token we're looking for. In that case we can accept a
14328  // newline here because it is not functioning as a statement terminator.
14329  bool accepted_newline = false;
14330  if (terminator != PM_TOKEN_EOF) {
14331  accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14332  }
14333 
14334  if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14335  // If we previously were on a comma and we just parsed a bare hash,
14336  // then we want to continue parsing arguments. This is because the
14337  // comma was grabbed up by the hash parser.
14338  } else if (accept1(parser, PM_TOKEN_COMMA)) {
14339  // If there was a comma, then we need to check if we also accepted a
14340  // newline. If we did, then this is a syntax error.
14341  if (accepted_newline) {
14342  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14343  }
14344  } else {
14345  // If there is no comma at the end of the argument list then we're
14346  // done parsing arguments and can break out of this loop.
14347  break;
14348  }
14349 
14350  // If we hit the terminator, then that means we have a trailing comma so
14351  // we can accept that output as well.
14352  if (match1(parser, terminator)) break;
14353  }
14354 }
14355 
14366 static pm_multi_target_node_t *
14367 parse_required_destructured_parameter(pm_parser_t *parser) {
14368  expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14369 
14370  pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14371  pm_multi_target_node_opening_set(node, &parser->previous);
14372 
14373  do {
14374  pm_node_t *param;
14375 
14376  // If we get here then we have a trailing comma, which isn't allowed in
14377  // the grammar. In other places, multi targets _do_ allow trailing
14378  // commas, so here we'll assume this is a mistake of the user not
14379  // knowing it's not allowed here.
14380  if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14381  param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14382  pm_multi_target_node_targets_append(parser, node, param);
14383  pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14384  break;
14385  }
14386 
14387  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14388  param = (pm_node_t *) parse_required_destructured_parameter(parser);
14389  } else if (accept1(parser, PM_TOKEN_USTAR)) {
14390  pm_token_t star = parser->previous;
14391  pm_node_t *value = NULL;
14392 
14393  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14394  pm_token_t name = parser->previous;
14395  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14396  if (pm_parser_parameter_name_check(parser, &name)) {
14397  pm_node_flag_set_repeated_parameter(value);
14398  }
14399  pm_parser_local_add_token(parser, &name, 1);
14400  }
14401 
14402  param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14403  } else {
14404  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14405  pm_token_t name = parser->previous;
14406 
14407  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14408  if (pm_parser_parameter_name_check(parser, &name)) {
14409  pm_node_flag_set_repeated_parameter(param);
14410  }
14411  pm_parser_local_add_token(parser, &name, 1);
14412  }
14413 
14414  pm_multi_target_node_targets_append(parser, node, param);
14415  } while (accept1(parser, PM_TOKEN_COMMA));
14416 
14417  accept1(parser, PM_TOKEN_NEWLINE);
14418  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14419  pm_multi_target_node_closing_set(node, &parser->previous);
14420 
14421  return node;
14422 }
14423 
14428 typedef enum {
14429  PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14430  PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14431  PM_PARAMETERS_ORDER_KEYWORDS_REST,
14432  PM_PARAMETERS_ORDER_KEYWORDS,
14433  PM_PARAMETERS_ORDER_REST,
14434  PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14435  PM_PARAMETERS_ORDER_OPTIONAL,
14436  PM_PARAMETERS_ORDER_NAMED,
14437  PM_PARAMETERS_ORDER_NONE,
14438 } pm_parameters_order_t;
14439 
14443 static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14444  [0] = PM_PARAMETERS_NO_CHANGE,
14445  [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14446  [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14447  [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14448  [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14449  [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14450  [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14451  [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14452  [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14453  [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14454  [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14455  [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14456 };
14457 
14465 static bool
14466 update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14467  pm_parameters_order_t state = parameters_ordering[token->type];
14468  if (state == PM_PARAMETERS_NO_CHANGE) return true;
14469 
14470  // If we see another ordered argument after a optional argument
14471  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14472  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14473  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14474  return true;
14475  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14476  return true;
14477  }
14478 
14479  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14480  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14481  return false;
14482  } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14483  pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14484  return false;
14485  } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14486  // We know what transition we failed on, so we can provide a better error here.
14487  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14488  return false;
14489  }
14490 
14491  if (state < *current) *current = state;
14492  return true;
14493 }
14494 
14498 static pm_parameters_node_t *
14499 parse_parameters(
14500  pm_parser_t *parser,
14501  pm_binding_power_t binding_power,
14502  bool uses_parentheses,
14503  bool allows_trailing_comma,
14504  bool allows_forwarding_parameters,
14505  bool accepts_blocks_in_defaults,
14506  uint16_t depth
14507 ) {
14508  pm_do_loop_stack_push(parser, false);
14509 
14510  pm_parameters_node_t *params = pm_parameters_node_create(parser);
14511  pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14512 
14513  while (true) {
14514  bool parsing = true;
14515 
14516  switch (parser->current.type) {
14518  update_parameter_state(parser, &parser->current, &order);
14519  pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14520 
14521  if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14522  pm_parameters_node_requireds_append(params, param);
14523  } else {
14524  pm_parameters_node_posts_append(params, param);
14525  }
14526  break;
14527  }
14528  case PM_TOKEN_UAMPERSAND:
14529  case PM_TOKEN_AMPERSAND: {
14530  update_parameter_state(parser, &parser->current, &order);
14531  parser_lex(parser);
14532 
14533  pm_token_t operator = parser->previous;
14534  pm_token_t name;
14535 
14536  bool repeated = false;
14537  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14538  name = parser->previous;
14539  repeated = pm_parser_parameter_name_check(parser, &name);
14540  pm_parser_local_add_token(parser, &name, 1);
14541  } else {
14542  name = not_provided(parser);
14543  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14544  }
14545 
14546  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14547  if (repeated) {
14548  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14549  }
14550  if (params->block == NULL) {
14551  pm_parameters_node_block_set(params, param);
14552  } else {
14553  pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14554  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14555  }
14556 
14557  break;
14558  }
14559  case PM_TOKEN_UDOT_DOT_DOT: {
14560  if (!allows_forwarding_parameters) {
14561  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14562  }
14563 
14564  bool succeeded = update_parameter_state(parser, &parser->current, &order);
14565  parser_lex(parser);
14566 
14567  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14568  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14569 
14570  if (params->keyword_rest != NULL) {
14571  // If we already have a keyword rest parameter, then we replace it with the
14572  // forwarding parameter and move the keyword rest parameter to the posts list.
14573  pm_node_t *keyword_rest = params->keyword_rest;
14574  pm_parameters_node_posts_append(params, keyword_rest);
14575  if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14576  params->keyword_rest = NULL;
14577  }
14578 
14579  pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14580  break;
14581  }
14583  case PM_TOKEN_IDENTIFIER:
14584  case PM_TOKEN_CONSTANT:
14587  case PM_TOKEN_METHOD_NAME: {
14588  parser_lex(parser);
14589  switch (parser->previous.type) {
14590  case PM_TOKEN_CONSTANT:
14591  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14592  break;
14594  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14595  break;
14597  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14598  break;
14600  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14601  break;
14602  case PM_TOKEN_METHOD_NAME:
14603  pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14604  break;
14605  default: break;
14606  }
14607 
14608  if (parser->current.type == PM_TOKEN_EQUAL) {
14609  update_parameter_state(parser, &parser->current, &order);
14610  } else {
14611  update_parameter_state(parser, &parser->previous, &order);
14612  }
14613 
14614  pm_token_t name = parser->previous;
14615  bool repeated = pm_parser_parameter_name_check(parser, &name);
14616  pm_parser_local_add_token(parser, &name, 1);
14617 
14618  if (match1(parser, PM_TOKEN_EQUAL)) {
14619  pm_token_t operator = parser->current;
14620  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14621  parser_lex(parser);
14622 
14623  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14624  uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14625 
14626  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14627  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14628  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14629 
14630  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14631 
14632  if (repeated) {
14633  pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14634  }
14635  pm_parameters_node_optionals_append(params, param);
14636 
14637  // If the value of the parameter increased the number of
14638  // reads of that parameter, then we need to warn that we
14639  // have a circular definition.
14640  if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14641  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14642  }
14643 
14644  context_pop(parser);
14645 
14646  // If parsing the value of the parameter resulted in error recovery,
14647  // then we can put a missing node in its place and stop parsing the
14648  // parameters entirely now.
14649  if (parser->recovering) {
14650  parsing = false;
14651  break;
14652  }
14653  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14654  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14655  if (repeated) {
14656  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14657  }
14658  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14659  } else {
14660  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14661  if (repeated) {
14662  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14663  }
14664  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14665  }
14666 
14667  break;
14668  }
14669  case PM_TOKEN_LABEL: {
14670  if (!uses_parentheses) parser->in_keyword_arg = true;
14671  update_parameter_state(parser, &parser->current, &order);
14672 
14673  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14674  parser_lex(parser);
14675 
14676  pm_token_t name = parser->previous;
14677  pm_token_t local = name;
14678  local.end -= 1;
14679 
14680  if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14681  pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14682  } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14683  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14684  }
14685 
14686  bool repeated = pm_parser_parameter_name_check(parser, &local);
14687  pm_parser_local_add_token(parser, &local, 1);
14688 
14689  switch (parser->current.type) {
14690  case PM_TOKEN_COMMA:
14692  case PM_TOKEN_PIPE: {
14693  context_pop(parser);
14694 
14695  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14696  if (repeated) {
14697  pm_node_flag_set_repeated_parameter(param);
14698  }
14699 
14700  pm_parameters_node_keywords_append(params, param);
14701  break;
14702  }
14703  case PM_TOKEN_SEMICOLON:
14704  case PM_TOKEN_NEWLINE: {
14705  context_pop(parser);
14706 
14707  if (uses_parentheses) {
14708  parsing = false;
14709  break;
14710  }
14711 
14712  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14713  if (repeated) {
14714  pm_node_flag_set_repeated_parameter(param);
14715  }
14716 
14717  pm_parameters_node_keywords_append(params, param);
14718  break;
14719  }
14720  default: {
14721  pm_node_t *param;
14722 
14723  if (token_begins_expression_p(parser->current.type)) {
14724  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14725  uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14726 
14727  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14728  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14729  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14730 
14731  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14732  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14733  }
14734 
14735  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14736  }
14737  else {
14738  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14739  }
14740 
14741  if (repeated) {
14742  pm_node_flag_set_repeated_parameter(param);
14743  }
14744 
14745  context_pop(parser);
14746  pm_parameters_node_keywords_append(params, param);
14747 
14748  // If parsing the value of the parameter resulted in error recovery,
14749  // then we can put a missing node in its place and stop parsing the
14750  // parameters entirely now.
14751  if (parser->recovering) {
14752  parsing = false;
14753  break;
14754  }
14755  }
14756  }
14757 
14758  parser->in_keyword_arg = false;
14759  break;
14760  }
14761  case PM_TOKEN_USTAR:
14762  case PM_TOKEN_STAR: {
14763  update_parameter_state(parser, &parser->current, &order);
14764  parser_lex(parser);
14765 
14766  pm_token_t operator = parser->previous;
14767  pm_token_t name;
14768  bool repeated = false;
14769 
14770  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14771  name = parser->previous;
14772  repeated = pm_parser_parameter_name_check(parser, &name);
14773  pm_parser_local_add_token(parser, &name, 1);
14774  } else {
14775  name = not_provided(parser);
14776  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14777  }
14778 
14779  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14780  if (repeated) {
14781  pm_node_flag_set_repeated_parameter(param);
14782  }
14783 
14784  if (params->rest == NULL) {
14785  pm_parameters_node_rest_set(params, param);
14786  } else {
14787  pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14788  pm_parameters_node_posts_append(params, param);
14789  }
14790 
14791  break;
14792  }
14793  case PM_TOKEN_STAR_STAR:
14794  case PM_TOKEN_USTAR_STAR: {
14795  pm_parameters_order_t previous_order = order;
14796  update_parameter_state(parser, &parser->current, &order);
14797  parser_lex(parser);
14798 
14799  pm_token_t operator = parser->previous;
14800  pm_node_t *param;
14801 
14802  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14803  if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14804  pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14805  }
14806 
14807  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14808  } else {
14809  pm_token_t name;
14810 
14811  bool repeated = false;
14812  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14813  name = parser->previous;
14814  repeated = pm_parser_parameter_name_check(parser, &name);
14815  pm_parser_local_add_token(parser, &name, 1);
14816  } else {
14817  name = not_provided(parser);
14818  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14819  }
14820 
14821  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14822  if (repeated) {
14823  pm_node_flag_set_repeated_parameter(param);
14824  }
14825  }
14826 
14827  if (params->keyword_rest == NULL) {
14828  pm_parameters_node_keyword_rest_set(params, param);
14829  } else {
14830  pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14831  pm_parameters_node_posts_append(params, param);
14832  }
14833 
14834  break;
14835  }
14836  default:
14837  if (parser->previous.type == PM_TOKEN_COMMA) {
14838  if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14839  // If we get here, then we have a trailing comma in a
14840  // block parameter list.
14841  pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14842 
14843  if (params->rest == NULL) {
14844  pm_parameters_node_rest_set(params, param);
14845  } else {
14846  pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14847  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14848  }
14849  } else {
14850  pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14851  }
14852  }
14853 
14854  parsing = false;
14855  break;
14856  }
14857 
14858  // If we hit some kind of issue while parsing the parameter, this would
14859  // have been set to false. In that case, we need to break out of the
14860  // loop.
14861  if (!parsing) break;
14862 
14863  bool accepted_newline = false;
14864  if (uses_parentheses) {
14865  accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14866  }
14867 
14868  if (accept1(parser, PM_TOKEN_COMMA)) {
14869  // If there was a comma, but we also accepted a newline, then this
14870  // is a syntax error.
14871  if (accepted_newline) {
14872  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14873  }
14874  } else {
14875  // If there was no comma, then we're done parsing parameters.
14876  break;
14877  }
14878  }
14879 
14880  pm_do_loop_stack_pop(parser);
14881 
14882  // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14883  if (params->base.location.start == params->base.location.end) {
14884  pm_node_destroy(parser, (pm_node_t *) params);
14885  return NULL;
14886  }
14887 
14888  return params;
14889 }
14890 
14895 static size_t
14896 token_newline_index(const pm_parser_t *parser) {
14897  if (parser->heredoc_end == NULL) {
14898  // This is the common case. In this case we can look at the previously
14899  // recorded newline in the newline list and subtract from the current
14900  // offset.
14901  return parser->newline_list.size - 1;
14902  } else {
14903  // This is unlikely. This is the case that we have already parsed the
14904  // start of a heredoc, so we cannot rely on looking at the previous
14905  // offset of the newline list, and instead must go through the whole
14906  // process of a binary search for the line number.
14907  return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14908  }
14909 }
14910 
14915 static int64_t
14916 token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14917  const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14918  const uint8_t *end = token->start;
14919 
14920  // Skip over the BOM if it is present.
14921  if (
14922  newline_index == 0 &&
14923  parser->start[0] == 0xef &&
14924  parser->start[1] == 0xbb &&
14925  parser->start[2] == 0xbf
14926  ) cursor += 3;
14927 
14928  int64_t column = 0;
14929  for (; cursor < end; cursor++) {
14930  switch (*cursor) {
14931  case '\t':
14932  column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14933  break;
14934  case ' ':
14935  column++;
14936  break;
14937  default:
14938  column++;
14939  if (break_on_non_space) return -1;
14940  break;
14941  }
14942  }
14943 
14944  return column;
14945 }
14946 
14951 static void
14952 parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14953  // If these warnings are disabled (unlikely), then we can just return.
14954  if (!parser->warn_mismatched_indentation) return;
14955 
14956  // If the tokens are on the same line, we do not warn.
14957  size_t closing_newline_index = token_newline_index(parser);
14958  if (opening_newline_index == closing_newline_index) return;
14959 
14960  // If the opening token has anything other than spaces or tabs before it,
14961  // then we do not warn. This is unless we are matching up an `if`/`end` pair
14962  // and the `if` immediately follows an `else` keyword.
14963  int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14964  if (!if_after_else && (opening_column == -1)) return;
14965 
14966  // Get a reference to the closing token off the current parser. This assumes
14967  // that the caller has placed this in the correct position.
14968  pm_token_t *closing_token = &parser->current;
14969 
14970  // If the tokens are at the same indentation, we do not warn.
14971  int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14972  if ((closing_column == -1) || (opening_column == closing_column)) return;
14973 
14974  // If the closing column is greater than the opening column and we are
14975  // allowing indentation, then we do not warn.
14976  if (allow_indent && (closing_column > opening_column)) return;
14977 
14978  // Otherwise, add a warning.
14979  PM_PARSER_WARN_FORMAT(
14980  parser,
14981  closing_token->start,
14982  closing_token->end,
14983  PM_WARN_INDENTATION_MISMATCH,
14984  (int) (closing_token->end - closing_token->start),
14985  (const char *) closing_token->start,
14986  (int) (opening_token->end - opening_token->start),
14987  (const char *) opening_token->start,
14988  ((int32_t) opening_newline_index) + parser->start_line
14989  );
14990 }
14991 
14992 typedef enum {
14993  PM_RESCUES_BEGIN = 1,
14994  PM_RESCUES_BLOCK,
14995  PM_RESCUES_CLASS,
14996  PM_RESCUES_DEF,
14997  PM_RESCUES_LAMBDA,
14998  PM_RESCUES_MODULE,
14999  PM_RESCUES_SCLASS
15000 } pm_rescues_type_t;
15001 
15006 static inline void
15007 parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
15008  pm_rescue_node_t *current = NULL;
15009 
15010  while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
15011  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15012  parser_lex(parser);
15013 
15014  pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
15015 
15016  switch (parser->current.type) {
15017  case PM_TOKEN_EQUAL_GREATER: {
15018  // Here we have an immediate => after the rescue keyword, in which case
15019  // we're going to have an empty list of exceptions to rescue (which
15020  // implies StandardError).
15021  parser_lex(parser);
15022  pm_rescue_node_operator_set(rescue, &parser->previous);
15023 
15024  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15025  reference = parse_target(parser, reference, false, false);
15026 
15027  pm_rescue_node_reference_set(rescue, reference);
15028  break;
15029  }
15030  case PM_TOKEN_NEWLINE:
15031  case PM_TOKEN_SEMICOLON:
15032  case PM_TOKEN_KEYWORD_THEN:
15033  // Here we have a terminator for the rescue keyword, in which case we're
15034  // going to just continue on.
15035  break;
15036  default: {
15037  if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15038  // Here we have something that could be an exception expression, so
15039  // we'll attempt to parse it here and any others delimited by commas.
15040 
15041  do {
15042  pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15043  pm_rescue_node_exceptions_append(rescue, expression);
15044 
15045  // If we hit a newline, then this is the end of the rescue expression. We
15046  // can continue on to parse the statements.
15047  if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15048 
15049  // If we hit a `=>` then we're going to parse the exception variable. Once
15050  // we've done that, we'll break out of the loop and parse the statements.
15051  if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15052  pm_rescue_node_operator_set(rescue, &parser->previous);
15053 
15054  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15055  reference = parse_target(parser, reference, false, false);
15056 
15057  pm_rescue_node_reference_set(rescue, reference);
15058  break;
15059  }
15060  } while (accept1(parser, PM_TOKEN_COMMA));
15061  }
15062  }
15063  }
15064 
15065  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15066  accept1(parser, PM_TOKEN_KEYWORD_THEN);
15067  } else {
15068  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15069  }
15070 
15072  pm_accepts_block_stack_push(parser, true);
15073  pm_context_t context;
15074 
15075  switch (type) {
15076  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15077  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15078  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15079  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15080  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15081  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15082  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15083  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15084  }
15085 
15086  pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15087  if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15088 
15089  pm_accepts_block_stack_pop(parser);
15090  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15091  }
15092 
15093  if (current == NULL) {
15094  pm_begin_node_rescue_clause_set(parent_node, rescue);
15095  } else {
15096  pm_rescue_node_subsequent_set(current, rescue);
15097  }
15098 
15099  current = rescue;
15100  }
15101 
15102  // The end node locations on rescue nodes will not be set correctly
15103  // since we won't know the end until we've found all subsequent
15104  // clauses. This sets the end location on all rescues once we know it.
15105  if (current != NULL) {
15106  const uint8_t *end_to_set = current->base.location.end;
15107  pm_rescue_node_t *clause = parent_node->rescue_clause;
15108 
15109  while (clause != NULL) {
15110  clause->base.location.end = end_to_set;
15111  clause = clause->subsequent;
15112  }
15113  }
15114 
15115  pm_token_t else_keyword;
15116  if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15117  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15118  opening_newline_index = token_newline_index(parser);
15119 
15120  else_keyword = parser->current;
15121  opening = &else_keyword;
15122 
15123  parser_lex(parser);
15124  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15125 
15126  pm_statements_node_t *else_statements = NULL;
15127  if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15128  pm_accepts_block_stack_push(parser, true);
15129  pm_context_t context;
15130 
15131  switch (type) {
15132  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15133  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15134  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15135  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15136  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15137  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15138  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15139  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15140  }
15141 
15142  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15143  pm_accepts_block_stack_pop(parser);
15144 
15145  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15146  }
15147 
15148  pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15149  pm_begin_node_else_clause_set(parent_node, else_clause);
15150 
15151  // If we don't have a `current` rescue node, then this is a dangling
15152  // else, and it's an error.
15153  if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15154  }
15155 
15156  if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15157  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15158  pm_token_t ensure_keyword = parser->current;
15159 
15160  parser_lex(parser);
15161  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15162 
15163  pm_statements_node_t *ensure_statements = NULL;
15164  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15165  pm_accepts_block_stack_push(parser, true);
15166  pm_context_t context;
15167 
15168  switch (type) {
15169  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15170  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15171  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15172  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15173  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15174  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15175  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15176  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15177  }
15178 
15179  ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15180  pm_accepts_block_stack_pop(parser);
15181 
15182  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15183  }
15184 
15185  pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15186  pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15187  }
15188 
15189  if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15190  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15191  pm_begin_node_end_keyword_set(parent_node, &parser->current);
15192  } else {
15193  pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15194  pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15195  }
15196 }
15197 
15202 static pm_begin_node_t *
15203 parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15204  pm_token_t begin_keyword = not_provided(parser);
15205  pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15206 
15207  parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15208  node->base.location.start = start;
15209 
15210  return node;
15211 }
15212 
15217 parse_block_parameters(
15218  pm_parser_t *parser,
15219  bool allows_trailing_comma,
15220  const pm_token_t *opening,
15221  bool is_lambda_literal,
15222  bool accepts_blocks_in_defaults,
15223  uint16_t depth
15224 ) {
15225  pm_parameters_node_t *parameters = NULL;
15226  if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15227  parameters = parse_parameters(
15228  parser,
15229  is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15230  false,
15231  allows_trailing_comma,
15232  false,
15233  accepts_blocks_in_defaults,
15234  (uint16_t) (depth + 1)
15235  );
15236  }
15237 
15238  pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15239  if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15240  accept1(parser, PM_TOKEN_NEWLINE);
15241 
15242  if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15243  do {
15244  switch (parser->current.type) {
15245  case PM_TOKEN_CONSTANT:
15246  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15247  parser_lex(parser);
15248  break;
15250  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15251  parser_lex(parser);
15252  break;
15254  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15255  parser_lex(parser);
15256  break;
15258  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15259  parser_lex(parser);
15260  break;
15261  default:
15262  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15263  break;
15264  }
15265 
15266  bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15267  pm_parser_local_add_token(parser, &parser->previous, 1);
15268 
15269  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15270  if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15271 
15272  pm_block_parameters_node_append_local(block_parameters, local);
15273  } while (accept1(parser, PM_TOKEN_COMMA));
15274  }
15275  }
15276 
15277  return block_parameters;
15278 }
15279 
15284 static bool
15285 outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15286  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15287  if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15288  }
15289 
15290  return false;
15291 }
15292 
15298 static const char * const pm_numbered_parameter_names[] = {
15299  "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15300 };
15301 
15307 static pm_node_t *
15308 parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15309  pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15310 
15311  // If we have ordinary parameters, then we will return them as the set of
15312  // parameters.
15313  if (parameters != NULL) {
15314  // If we also have implicit parameters, then this is an error.
15315  if (implicit_parameters->size > 0) {
15316  pm_node_t *node = implicit_parameters->nodes[0];
15317 
15319  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15320  } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15321  pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15322  } else {
15323  assert(false && "unreachable");
15324  }
15325  }
15326 
15327  return parameters;
15328  }
15329 
15330  // If we don't have any implicit parameters, then the set of parameters is
15331  // NULL.
15332  if (implicit_parameters->size == 0) {
15333  return NULL;
15334  }
15335 
15336  // If we don't have ordinary parameters, then we now must validate our set
15337  // of implicit parameters. We can only have numbered parameters or it, but
15338  // they cannot be mixed.
15339  uint8_t numbered_parameter = 0;
15340  bool it_parameter = false;
15341 
15342  for (size_t index = 0; index < implicit_parameters->size; index++) {
15343  pm_node_t *node = implicit_parameters->nodes[index];
15344 
15346  if (it_parameter) {
15347  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15348  } else if (outer_scope_using_numbered_parameters_p(parser)) {
15349  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15350  } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15351  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15352  } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15353  numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15354  } else {
15355  assert(false && "unreachable");
15356  }
15357  } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15358  if (numbered_parameter > 0) {
15359  pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15360  } else {
15361  it_parameter = true;
15362  }
15363  }
15364  }
15365 
15366  if (numbered_parameter > 0) {
15367  // Go through the parent scopes and mark them as being disallowed from
15368  // using numbered parameters because this inner scope is using them.
15369  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15370  scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15371  }
15372 
15373  const pm_location_t location = { .start = opening->start, .end = closing->end };
15374  return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15375  }
15376 
15377  if (it_parameter) {
15378  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15379  }
15380 
15381  return NULL;
15382 }
15383 
15387 static pm_block_node_t *
15388 parse_block(pm_parser_t *parser, uint16_t depth) {
15389  pm_token_t opening = parser->previous;
15390  accept1(parser, PM_TOKEN_NEWLINE);
15391 
15392  pm_accepts_block_stack_push(parser, true);
15393  pm_parser_scope_push(parser, false);
15394 
15395  pm_block_parameters_node_t *block_parameters = NULL;
15396 
15397  if (accept1(parser, PM_TOKEN_PIPE)) {
15398  pm_token_t block_parameters_opening = parser->previous;
15399  if (match1(parser, PM_TOKEN_PIPE)) {
15400  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15401  parser->command_start = true;
15402  parser_lex(parser);
15403  } else {
15404  block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15405  accept1(parser, PM_TOKEN_NEWLINE);
15406  parser->command_start = true;
15407  expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15408  }
15409 
15410  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15411  }
15412 
15413  accept1(parser, PM_TOKEN_NEWLINE);
15414  pm_node_t *statements = NULL;
15415 
15416  if (opening.type == PM_TOKEN_BRACE_LEFT) {
15417  if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15418  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15419  }
15420 
15421  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15422  } else {
15423  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15425  pm_accepts_block_stack_push(parser, true);
15426  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15427  pm_accepts_block_stack_pop(parser);
15428  }
15429 
15430  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15431  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15432  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15433  }
15434  }
15435 
15436  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15437  }
15438 
15439  pm_constant_id_list_t locals;
15440  pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15441  pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15442 
15443  pm_parser_scope_pop(parser);
15444  pm_accepts_block_stack_pop(parser);
15445 
15446  return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15447 }
15448 
15454 static bool
15455 parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15456  bool found = false;
15457 
15458  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15459  found |= true;
15460  arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15461 
15462  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15463  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15464  } else {
15465  pm_accepts_block_stack_push(parser, true);
15466  parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15467 
15468  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15469  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15470  parser->previous.start = parser->previous.end;
15471  parser->previous.type = PM_TOKEN_MISSING;
15472  }
15473 
15474  pm_accepts_block_stack_pop(parser);
15475  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15476  }
15477  } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15478  found |= true;
15479  pm_accepts_block_stack_push(parser, false);
15480 
15481  // If we get here, then the subsequent token cannot be used as an infix
15482  // operator. In this case we assume the subsequent token is part of an
15483  // argument to this method call.
15484  parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15485 
15486  // If we have done with the arguments and still not consumed the comma,
15487  // then we have a trailing comma where we need to check whether it is
15488  // allowed or not.
15489  if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15490  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15491  }
15492 
15493  pm_accepts_block_stack_pop(parser);
15494  }
15495 
15496  // If we're at the end of the arguments, we can now check if there is a block
15497  // node that starts with a {. If there is, then we can parse it and add it to
15498  // the arguments.
15499  if (accepts_block) {
15500  pm_block_node_t *block = NULL;
15501 
15502  if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15503  found |= true;
15504  block = parse_block(parser, (uint16_t) (depth + 1));
15505  pm_arguments_validate_block(parser, arguments, block);
15506  } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15507  found |= true;
15508  block = parse_block(parser, (uint16_t) (depth + 1));
15509  }
15510 
15511  if (block != NULL) {
15512  if (arguments->block == NULL && !arguments->has_forwarding) {
15513  arguments->block = (pm_node_t *) block;
15514  } else {
15515  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15516 
15517  if (arguments->block != NULL) {
15518  if (arguments->arguments == NULL) {
15519  arguments->arguments = pm_arguments_node_create(parser);
15520  }
15521  pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15522  }
15523  arguments->block = (pm_node_t *) block;
15524  }
15525  }
15526  }
15527 
15528  return found;
15529 }
15530 
15535 static void
15536 parse_return(pm_parser_t *parser, pm_node_t *node) {
15537  bool in_sclass = false;
15538  for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15539  switch (context_node->context) {
15540  case PM_CONTEXT_BEGIN_ELSE:
15543  case PM_CONTEXT_BEGIN:
15544  case PM_CONTEXT_CASE_IN:
15545  case PM_CONTEXT_CASE_WHEN:
15547  case PM_CONTEXT_DEFINED:
15548  case PM_CONTEXT_ELSE:
15549  case PM_CONTEXT_ELSIF:
15550  case PM_CONTEXT_EMBEXPR:
15551  case PM_CONTEXT_FOR_INDEX:
15552  case PM_CONTEXT_FOR:
15553  case PM_CONTEXT_IF:
15555  case PM_CONTEXT_MAIN:
15557  case PM_CONTEXT_PARENS:
15558  case PM_CONTEXT_POSTEXE:
15559  case PM_CONTEXT_PREDICATE:
15560  case PM_CONTEXT_PREEXE:
15562  case PM_CONTEXT_TERNARY:
15563  case PM_CONTEXT_UNLESS:
15564  case PM_CONTEXT_UNTIL:
15565  case PM_CONTEXT_WHILE:
15566  // Keep iterating up the lists of contexts, because returns can
15567  // see through these.
15568  continue;
15572  case PM_CONTEXT_SCLASS:
15573  in_sclass = true;
15574  continue;
15575  case PM_CONTEXT_CLASS_ELSE:
15578  case PM_CONTEXT_CLASS:
15582  case PM_CONTEXT_MODULE:
15583  // These contexts are invalid for a return.
15584  pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15585  return;
15587  case PM_CONTEXT_BLOCK_ELSE:
15591  case PM_CONTEXT_DEF_ELSE:
15592  case PM_CONTEXT_DEF_ENSURE:
15593  case PM_CONTEXT_DEF_PARAMS:
15594  case PM_CONTEXT_DEF_RESCUE:
15595  case PM_CONTEXT_DEF:
15601  // These contexts are valid for a return, and we should not
15602  // continue to loop.
15603  return;
15604  case PM_CONTEXT_NONE:
15605  // This case should never happen.
15606  assert(false && "unreachable");
15607  break;
15608  }
15609  }
15610  if (in_sclass) {
15611  pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15612  }
15613 }
15614 
15619 static void
15620 parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15621  for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15622  switch (context_node->context) {
15625  case PM_CONTEXT_BLOCK_ELSE:
15628  case PM_CONTEXT_DEFINED:
15629  case PM_CONTEXT_FOR:
15636  case PM_CONTEXT_POSTEXE:
15637  case PM_CONTEXT_UNTIL:
15638  case PM_CONTEXT_WHILE:
15639  // These are the good cases. We're allowed to have a block exit
15640  // in these contexts.
15641  return;
15642  case PM_CONTEXT_DEF:
15643  case PM_CONTEXT_DEF_PARAMS:
15644  case PM_CONTEXT_DEF_ELSE:
15645  case PM_CONTEXT_DEF_ENSURE:
15646  case PM_CONTEXT_DEF_RESCUE:
15647  case PM_CONTEXT_MAIN:
15648  case PM_CONTEXT_PREEXE:
15649  case PM_CONTEXT_SCLASS:
15653  // These are the bad cases. We're not allowed to have a block
15654  // exit in these contexts.
15655  //
15656  // If we get here, then we're about to mark this block exit
15657  // as invalid. However, it could later _become_ valid if we
15658  // find a trailing while/until on the expression. In this
15659  // case instead of adding the error here, we'll add the
15660  // block exit to the list of exits for the expression, and
15661  // the node parsing will handle validating it instead.
15662  assert(parser->current_block_exits != NULL);
15664  return;
15665  case PM_CONTEXT_BEGIN_ELSE:
15668  case PM_CONTEXT_BEGIN:
15669  case PM_CONTEXT_CASE_IN:
15670  case PM_CONTEXT_CASE_WHEN:
15671  case PM_CONTEXT_CLASS_ELSE:
15674  case PM_CONTEXT_CLASS:
15676  case PM_CONTEXT_ELSE:
15677  case PM_CONTEXT_ELSIF:
15678  case PM_CONTEXT_EMBEXPR:
15679  case PM_CONTEXT_FOR_INDEX:
15680  case PM_CONTEXT_IF:
15684  case PM_CONTEXT_MODULE:
15686  case PM_CONTEXT_PARENS:
15687  case PM_CONTEXT_PREDICATE:
15689  case PM_CONTEXT_TERNARY:
15690  case PM_CONTEXT_UNLESS:
15691  // In these contexts we should continue walking up the list of
15692  // contexts.
15693  break;
15694  case PM_CONTEXT_NONE:
15695  // This case should never happen.
15696  assert(false && "unreachable");
15697  break;
15698  }
15699  }
15700 }
15701 
15706 static pm_node_list_t *
15707 push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15708  pm_node_list_t *previous_block_exits = parser->current_block_exits;
15709  parser->current_block_exits = current_block_exits;
15710  return previous_block_exits;
15711 }
15712 
15718 static void
15719 flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15720  pm_node_t *block_exit;
15721  PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15722  const char *type;
15723 
15724  switch (PM_NODE_TYPE(block_exit)) {
15725  case PM_BREAK_NODE: type = "break"; break;
15726  case PM_NEXT_NODE: type = "next"; break;
15727  case PM_REDO_NODE: type = "redo"; break;
15728  default: assert(false && "unreachable"); type = ""; break;
15729  }
15730 
15731  PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15732  }
15733 
15734  parser->current_block_exits = previous_block_exits;
15735 }
15736 
15741 static void
15742 pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15744  // If we matched a trailing while/until, then all of the block exits in
15745  // the contained list are valid. In this case we do not need to do
15746  // anything.
15747  parser->current_block_exits = previous_block_exits;
15748  } else if (previous_block_exits != NULL) {
15749  // If we did not matching a trailing while/until, then all of the block
15750  // exits contained in the list are invalid for this specific context.
15751  // However, they could still become valid in a higher level context if
15752  // there is another list above this one. In this case we'll push all of
15753  // the block exits up to the previous list.
15754  pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15755  parser->current_block_exits = previous_block_exits;
15756  } else {
15757  // If we did not match a trailing while/until and this was the last
15758  // chance to do so, then all of the block exits in the list are invalid
15759  // and we need to add an error for each of them.
15760  flush_block_exits(parser, previous_block_exits);
15761  }
15762 }
15763 
15764 static inline pm_node_t *
15765 parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15766  context_push(parser, PM_CONTEXT_PREDICATE);
15767  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15768  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15769 
15770  // Predicates are closed by a term, a "then", or a term and then a "then".
15771  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15772 
15773  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15774  predicate_closed = true;
15775  *then_keyword = parser->previous;
15776  }
15777 
15778  if (!predicate_closed) {
15779  pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15780  }
15781 
15782  context_pop(parser);
15783  return predicate;
15784 }
15785 
15786 static inline pm_node_t *
15787 parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15788  pm_node_list_t current_block_exits = { 0 };
15789  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15790 
15791  pm_token_t keyword = parser->previous;
15792  pm_token_t then_keyword = not_provided(parser);
15793 
15794  pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15795  pm_statements_node_t *statements = NULL;
15796 
15798  pm_accepts_block_stack_push(parser, true);
15799  statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15800  pm_accepts_block_stack_pop(parser);
15801  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15802  }
15803 
15804  pm_token_t end_keyword = not_provided(parser);
15805  pm_node_t *parent = NULL;
15806 
15807  switch (context) {
15808  case PM_CONTEXT_IF:
15809  parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15810  break;
15811  case PM_CONTEXT_UNLESS:
15812  parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15813  break;
15814  default:
15815  assert(false && "unreachable");
15816  break;
15817  }
15818 
15819  pm_node_t *current = parent;
15820 
15821  // Parse any number of elsif clauses. This will form a linked list of if
15822  // nodes pointing to each other from the top.
15823  if (context == PM_CONTEXT_IF) {
15824  while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15825  if (parser_end_of_line_p(parser)) {
15826  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15827  }
15828 
15829  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15830  pm_token_t elsif_keyword = parser->current;
15831  parser_lex(parser);
15832 
15833  pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15834  pm_accepts_block_stack_push(parser, true);
15835 
15836  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15837  pm_accepts_block_stack_pop(parser);
15838  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15839 
15840  pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15841  ((pm_if_node_t *) current)->subsequent = elsif;
15842  current = elsif;
15843  }
15844  }
15845 
15846  if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15847  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15848  opening_newline_index = token_newline_index(parser);
15849 
15850  parser_lex(parser);
15851  pm_token_t else_keyword = parser->previous;
15852 
15853  pm_accepts_block_stack_push(parser, true);
15854  pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15855  pm_accepts_block_stack_pop(parser);
15856 
15857  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15858  parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15859  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15860 
15861  pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15862 
15863  switch (context) {
15864  case PM_CONTEXT_IF:
15865  ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15866  break;
15867  case PM_CONTEXT_UNLESS:
15868  ((pm_unless_node_t *) parent)->else_clause = else_node;
15869  break;
15870  default:
15871  assert(false && "unreachable");
15872  break;
15873  }
15874  } else {
15875  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15876  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15877  }
15878 
15879  // Set the appropriate end location for all of the nodes in the subtree.
15880  switch (context) {
15881  case PM_CONTEXT_IF: {
15882  pm_node_t *current = parent;
15883  bool recursing = true;
15884 
15885  while (recursing) {
15886  switch (PM_NODE_TYPE(current)) {
15887  case PM_IF_NODE:
15888  pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15889  current = ((pm_if_node_t *) current)->subsequent;
15890  recursing = current != NULL;
15891  break;
15892  case PM_ELSE_NODE:
15893  pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15894  recursing = false;
15895  break;
15896  default: {
15897  recursing = false;
15898  break;
15899  }
15900  }
15901  }
15902  break;
15903  }
15904  case PM_CONTEXT_UNLESS:
15905  pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15906  break;
15907  default:
15908  assert(false && "unreachable");
15909  break;
15910  }
15911 
15912  pop_block_exits(parser, previous_block_exits);
15913  pm_node_list_free(&current_block_exits);
15914 
15915  return parent;
15916 }
15917 
15922 #define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15923  case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15924  case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15925  case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15926  case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15927  case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15928  case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15929  case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15930  case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15931  case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15932  case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15933 
15938 #define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15939  case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15940  case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15941  case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15942  case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15943  case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15944  case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15945  case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15946 
15952 #define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15953  case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15954  case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15955  case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15956  case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15957  case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15958  case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15959  case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15960  case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15961 
15966 #define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15967  case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15968  case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15969  case PM_TOKEN_CLASS_VARIABLE
15970 
15975 #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15976  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15977  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15978  case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15979 
15980 // Assert here that the flags are the same so that we can safely switch the type
15981 // of the node without having to move the flags.
15982 PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15983 
15988 static inline pm_node_flags_t
15989 parse_unescaped_encoding(const pm_parser_t *parser) {
15990  if (parser->explicit_encoding != NULL) {
15991  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
15992  // If the there's an explicit encoding and it's using a UTF-8 escape
15993  // sequence, then mark the string as UTF-8.
15995  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15996  // If there's a non-UTF-8 escape sequence being used, then the
15997  // string uses the source encoding, unless the source is marked as
15998  // US-ASCII. In that case the string is forced as ASCII-8BIT in
15999  // order to keep the string valid.
16001  }
16002  }
16003  return 0;
16004 }
16005 
16010 static pm_node_t *
16011 parse_string_part(pm_parser_t *parser, uint16_t depth) {
16012  switch (parser->current.type) {
16013  // Here the lexer has returned to us plain string content. In this case
16014  // we'll create a string node that has no opening or closing and return that
16015  // as the part. These kinds of parts look like:
16016  //
16017  // "aaa #{bbb} #@ccc ddd"
16018  // ^^^^ ^ ^^^^
16019  case PM_TOKEN_STRING_CONTENT: {
16020  pm_token_t opening = not_provided(parser);
16021  pm_token_t closing = not_provided(parser);
16022 
16023  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16024  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16025 
16026  parser_lex(parser);
16027  return node;
16028  }
16029  // Here the lexer has returned the beginning of an embedded expression. In
16030  // that case we'll parse the inner statements and return that as the part.
16031  // These kinds of parts look like:
16032  //
16033  // "aaa #{bbb} #@ccc ddd"
16034  // ^^^^^^
16035  case PM_TOKEN_EMBEXPR_BEGIN: {
16036  // Ruby disallows seeing encoding around interpolation in strings,
16037  // even though it is known at parse time.
16038  parser->explicit_encoding = NULL;
16039 
16040  pm_lex_state_t state = parser->lex_state;
16041  int brace_nesting = parser->brace_nesting;
16042 
16043  parser->brace_nesting = 0;
16044  lex_state_set(parser, PM_LEX_STATE_BEG);
16045  parser_lex(parser);
16046 
16047  pm_token_t opening = parser->previous;
16048  pm_statements_node_t *statements = NULL;
16049 
16050  if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16051  pm_accepts_block_stack_push(parser, true);
16052  statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16053  pm_accepts_block_stack_pop(parser);
16054  }
16055 
16056  parser->brace_nesting = brace_nesting;
16057  lex_state_set(parser, state);
16058 
16059  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16060  pm_token_t closing = parser->previous;
16061 
16062  // If this set of embedded statements only contains a single
16063  // statement, then Ruby does not consider it as a possible statement
16064  // that could emit a line event.
16065  if (statements != NULL && statements->body.size == 1) {
16066  pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16067  }
16068 
16069  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16070  }
16071 
16072  // Here the lexer has returned the beginning of an embedded variable.
16073  // In that case we'll parse the variable and create an appropriate node
16074  // for it and then return that node. These kinds of parts look like:
16075  //
16076  // "aaa #{bbb} #@ccc ddd"
16077  // ^^^^^
16078  case PM_TOKEN_EMBVAR: {
16079  // Ruby disallows seeing encoding around interpolation in strings,
16080  // even though it is known at parse time.
16081  parser->explicit_encoding = NULL;
16082 
16083  lex_state_set(parser, PM_LEX_STATE_BEG);
16084  parser_lex(parser);
16085 
16086  pm_token_t operator = parser->previous;
16087  pm_node_t *variable;
16088 
16089  switch (parser->current.type) {
16090  // In this case a back reference is being interpolated. We'll
16091  // create a global variable read node.
16093  parser_lex(parser);
16094  variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16095  break;
16096  // In this case an nth reference is being interpolated. We'll
16097  // create a global variable read node.
16099  parser_lex(parser);
16100  variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16101  break;
16102  // In this case a global variable is being interpolated. We'll
16103  // create a global variable read node.
16105  parser_lex(parser);
16106  variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16107  break;
16108  // In this case an instance variable is being interpolated.
16109  // We'll create an instance variable read node.
16111  parser_lex(parser);
16112  variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16113  break;
16114  // In this case a class variable is being interpolated. We'll
16115  // create a class variable read node.
16117  parser_lex(parser);
16118  variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16119  break;
16120  // We can hit here if we got an invalid token. In that case
16121  // we'll not attempt to lex this token and instead just return a
16122  // missing node.
16123  default:
16124  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16125  variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16126  break;
16127  }
16128 
16129  return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16130  }
16131  default:
16132  parser_lex(parser);
16133  pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16134  return NULL;
16135  }
16136 }
16137 
16143 static const uint8_t *
16144 parse_operator_symbol_name(const pm_token_t *name) {
16145  switch (name->type) {
16146  case PM_TOKEN_TILDE:
16147  case PM_TOKEN_BANG:
16148  if (name->end[-1] == '@') return name->end - 1;
16149  /* fallthrough */
16150  default:
16151  return name->end;
16152  }
16153 }
16154 
16155 static pm_node_t *
16156 parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16157  pm_token_t closing = not_provided(parser);
16158  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16159 
16160  const uint8_t *end = parse_operator_symbol_name(&parser->current);
16161 
16162  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16163  parser_lex(parser);
16164 
16165  pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16166  pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16167 
16168  return (pm_node_t *) symbol;
16169 }
16170 
16176 static pm_node_t *
16177 parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16178  const pm_token_t opening = parser->previous;
16179 
16180  if (lex_mode->mode != PM_LEX_STRING) {
16181  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16182 
16183  switch (parser->current.type) {
16184  case PM_CASE_OPERATOR:
16185  return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16186  case PM_TOKEN_IDENTIFIER:
16187  case PM_TOKEN_CONSTANT:
16189  case PM_TOKEN_METHOD_NAME:
16194  case PM_CASE_KEYWORD:
16195  parser_lex(parser);
16196  break;
16197  default:
16198  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16199  break;
16200  }
16201 
16202  pm_token_t closing = not_provided(parser);
16203  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16204 
16205  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16206  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16207 
16208  return (pm_node_t *) symbol;
16209  }
16210 
16211  if (lex_mode->as.string.interpolation) {
16212  // If we have the end of the symbol, then we can return an empty symbol.
16213  if (match1(parser, PM_TOKEN_STRING_END)) {
16214  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16215  parser_lex(parser);
16216 
16217  pm_token_t content = not_provided(parser);
16218  pm_token_t closing = parser->previous;
16219  return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16220  }
16221 
16222  // Now we can parse the first part of the symbol.
16223  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16224 
16225  // If we got a string part, then it's possible that we could transform
16226  // what looks like an interpolated symbol into a regular symbol.
16227  if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16228  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16229  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16230 
16231  return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16232  }
16233 
16234  pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16235  if (part) pm_interpolated_symbol_node_append(symbol, part);
16236 
16237  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16238  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16239  pm_interpolated_symbol_node_append(symbol, part);
16240  }
16241  }
16242 
16243  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16244  if (match1(parser, PM_TOKEN_EOF)) {
16245  pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16246  } else {
16247  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16248  }
16249 
16250  pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16251  return (pm_node_t *) symbol;
16252  }
16253 
16254  pm_token_t content;
16255  pm_string_t unescaped;
16256 
16257  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16258  content = parser->current;
16259  unescaped = parser->current_string;
16260  parser_lex(parser);
16261 
16262  // If we have two string contents in a row, then the content of this
16263  // symbol is split because of heredoc contents. This looks like:
16264  //
16265  // <<A; :'a
16266  // A
16267  // b'
16268  //
16269  // In this case, the best way we have to represent this is as an
16270  // interpolated string node, so that's what we'll do here.
16271  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16272  pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16273  pm_token_t bounds = not_provided(parser);
16274 
16275  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16276  pm_interpolated_symbol_node_append(symbol, part);
16277 
16278  part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16279  pm_interpolated_symbol_node_append(symbol, part);
16280 
16281  if (next_state != PM_LEX_STATE_NONE) {
16282  lex_state_set(parser, next_state);
16283  }
16284 
16285  parser_lex(parser);
16286  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16287 
16288  pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16289  return (pm_node_t *) symbol;
16290  }
16291  } else {
16292  content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16293  pm_string_shared_init(&unescaped, content.start, content.end);
16294  }
16295 
16296  if (next_state != PM_LEX_STATE_NONE) {
16297  lex_state_set(parser, next_state);
16298  }
16299 
16300  if (match1(parser, PM_TOKEN_EOF)) {
16301  pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16302  } else {
16303  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16304  }
16305 
16306  return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16307 }
16308 
16313 static inline pm_node_t *
16314 parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16315  switch (parser->current.type) {
16316  case PM_CASE_OPERATOR: {
16317  const pm_token_t opening = not_provided(parser);
16318  return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16319  }
16320  case PM_CASE_KEYWORD:
16321  case PM_TOKEN_CONSTANT:
16322  case PM_TOKEN_IDENTIFIER:
16323  case PM_TOKEN_METHOD_NAME: {
16324  parser_lex(parser);
16325 
16326  pm_token_t opening = not_provided(parser);
16327  pm_token_t closing = not_provided(parser);
16328  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16329 
16330  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16331  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16332 
16333  return (pm_node_t *) symbol;
16334  }
16335  case PM_TOKEN_SYMBOL_BEGIN: {
16336  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16337  parser_lex(parser);
16338 
16339  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16340  }
16341  default:
16342  pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16343  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16344  }
16345 }
16346 
16353 static inline pm_node_t *
16354 parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16355  switch (parser->current.type) {
16356  case PM_CASE_OPERATOR: {
16357  const pm_token_t opening = not_provided(parser);
16358  return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16359  }
16360  case PM_CASE_KEYWORD:
16361  case PM_TOKEN_CONSTANT:
16362  case PM_TOKEN_IDENTIFIER:
16363  case PM_TOKEN_METHOD_NAME: {
16364  if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16365  parser_lex(parser);
16366 
16367  pm_token_t opening = not_provided(parser);
16368  pm_token_t closing = not_provided(parser);
16369  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16370 
16371  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16372  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16373 
16374  return (pm_node_t *) symbol;
16375  }
16376  case PM_TOKEN_SYMBOL_BEGIN: {
16377  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16378  parser_lex(parser);
16379 
16380  return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16381  }
16383  parser_lex(parser);
16384  return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16386  parser_lex(parser);
16387  return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16389  parser_lex(parser);
16390  return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16391  default:
16392  pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16393  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16394  }
16395 }
16396 
16401 static pm_node_t *
16402 parse_variable(pm_parser_t *parser) {
16403  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16404  int depth;
16405 
16406  if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16407  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16408  }
16409 
16410  pm_scope_t *current_scope = parser->current_scope;
16411  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16412  if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16413  // When you use a numbered parameter, it implies the existence of
16414  // all of the locals that exist before it. For example, referencing
16415  // _2 means that _1 must exist. Therefore here we loop through all
16416  // of the possibilities and add them into the constant pool.
16417  uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16418  for (uint8_t number = 1; number <= maximum; number++) {
16419  pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16420  }
16421 
16422  if (!match1(parser, PM_TOKEN_EQUAL)) {
16423  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16424  }
16425 
16426  pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16427  pm_node_list_append(&current_scope->implicit_parameters, node);
16428 
16429  return node;
16430  } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16431  pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16432  pm_node_list_append(&current_scope->implicit_parameters, node);
16433 
16434  return node;
16435  }
16436  }
16437 
16438  return NULL;
16439 }
16440 
16444 static pm_node_t *
16445 parse_variable_call(pm_parser_t *parser) {
16446  pm_node_flags_t flags = 0;
16447 
16448  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16449  pm_node_t *node = parse_variable(parser);
16450  if (node != NULL) return node;
16452  }
16453 
16454  pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16455  pm_node_flag_set((pm_node_t *)node, flags);
16456 
16457  return (pm_node_t *) node;
16458 }
16459 
16465 static inline pm_token_t
16466 parse_method_definition_name(pm_parser_t *parser) {
16467  switch (parser->current.type) {
16468  case PM_CASE_KEYWORD:
16469  case PM_TOKEN_CONSTANT:
16470  case PM_TOKEN_METHOD_NAME:
16471  parser_lex(parser);
16472  return parser->previous;
16473  case PM_TOKEN_IDENTIFIER:
16474  pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16475  parser_lex(parser);
16476  return parser->previous;
16477  case PM_CASE_OPERATOR:
16478  lex_state_set(parser, PM_LEX_STATE_ENDFN);
16479  parser_lex(parser);
16480  return parser->previous;
16481  default:
16482  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16483  return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16484  }
16485 }
16486 
16487 static void
16488 parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16489  // Get a reference to the string struct that is being held by the string
16490  // node. This is the value we're going to actually manipulate.
16491  pm_string_ensure_owned(string);
16492 
16493  // Now get the bounds of the existing string. We'll use this as a
16494  // destination to move bytes into. We'll also use it for bounds checking
16495  // since we don't require that these strings be null terminated.
16496  size_t dest_length = pm_string_length(string);
16497  const uint8_t *source_cursor = (uint8_t *) string->source;
16498  const uint8_t *source_end = source_cursor + dest_length;
16499 
16500  // We're going to move bytes backward in the string when we get leading
16501  // whitespace, so we'll maintain a pointer to the current position in the
16502  // string that we're writing to.
16503  size_t trimmed_whitespace = 0;
16504 
16505  // While we haven't reached the amount of common whitespace that we need to
16506  // trim and we haven't reached the end of the string, we'll keep trimming
16507  // whitespace. Trimming in this context means skipping over these bytes such
16508  // that they aren't copied into the new string.
16509  while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16510  if (*source_cursor == '\t') {
16511  trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16512  if (trimmed_whitespace > common_whitespace) break;
16513  } else {
16514  trimmed_whitespace++;
16515  }
16516 
16517  source_cursor++;
16518  dest_length--;
16519  }
16520 
16521  memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16522  string->length = dest_length;
16523 }
16524 
16528 static void
16529 parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16530  // The next node should be dedented if it's the first node in the list or if
16531  // it follows a string node.
16532  bool dedent_next = true;
16533 
16534  // Iterate over all nodes, and trim whitespace accordingly. We're going to
16535  // keep around two indices: a read and a write. If we end up trimming all of
16536  // the whitespace from a node, then we'll drop it from the list entirely.
16537  size_t write_index = 0;
16538 
16539  pm_node_t *node;
16540  PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16541  // We're not manipulating child nodes that aren't strings. In this case
16542  // we'll skip past it and indicate that the subsequent node should not
16543  // be dedented.
16544  if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16545  nodes->nodes[write_index++] = node;
16546  dedent_next = false;
16547  continue;
16548  }
16549 
16550  pm_string_node_t *string_node = ((pm_string_node_t *) node);
16551  if (dedent_next) {
16552  parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16553  }
16554 
16555  if (string_node->unescaped.length == 0) {
16556  pm_node_destroy(parser, node);
16557  } else {
16558  nodes->nodes[write_index++] = node;
16559  }
16560 
16561  // We always dedent the next node if it follows a string node.
16562  dedent_next = true;
16563  }
16564 
16565  nodes->size = write_index;
16566 }
16567 
16571 static pm_token_t
16572 parse_strings_empty_content(const uint8_t *location) {
16573  return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16574 }
16575 
16579 static inline pm_node_t *
16580 parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16581  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16582  bool concating = false;
16583 
16584  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16585  pm_node_t *node = NULL;
16586 
16587  // Here we have found a string literal. We'll parse it and add it to
16588  // the list of strings.
16589  const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16590  assert(lex_mode->mode == PM_LEX_STRING);
16591  bool lex_interpolation = lex_mode->as.string.interpolation;
16592  bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16593 
16594  pm_token_t opening = parser->current;
16595  parser_lex(parser);
16596 
16597  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16598  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16599  // If we get here, then we have an end immediately after a
16600  // start. In that case we'll create an empty content token and
16601  // return an uninterpolated string.
16602  pm_token_t content = parse_strings_empty_content(parser->previous.start);
16603  pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16604 
16605  pm_string_shared_init(&string->unescaped, content.start, content.end);
16606  node = (pm_node_t *) string;
16607  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16608  // If we get here, then we have an end of a label immediately
16609  // after a start. In that case we'll create an empty symbol
16610  // node.
16611  pm_token_t content = parse_strings_empty_content(parser->previous.start);
16612  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16613 
16614  pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16615  node = (pm_node_t *) symbol;
16616 
16617  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16618  } else if (!lex_interpolation) {
16619  // If we don't accept interpolation then we expect the string to
16620  // start with a single string content node.
16621  pm_string_t unescaped;
16622  pm_token_t content;
16623 
16624  if (match1(parser, PM_TOKEN_EOF)) {
16625  unescaped = PM_STRING_EMPTY;
16626  content = not_provided(parser);
16627  } else {
16628  unescaped = parser->current_string;
16629  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16630  content = parser->previous;
16631  }
16632 
16633  // It is unfortunately possible to have multiple string content
16634  // nodes in a row in the case that there's heredoc content in
16635  // the middle of the string, like this cursed example:
16636  //
16637  // <<-END+'b
16638  // a
16639  // END
16640  // c'+'d'
16641  //
16642  // In that case we need to switch to an interpolated string to
16643  // be able to contain all of the parts.
16644  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16645  pm_node_list_t parts = { 0 };
16646 
16647  pm_token_t delimiters = not_provided(parser);
16648  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16649  pm_node_list_append(&parts, part);
16650 
16651  do {
16652  part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16653  pm_node_list_append(&parts, part);
16654  parser_lex(parser);
16655  } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16656 
16657  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16658  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16659 
16660  pm_node_list_free(&parts);
16661  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16662  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16663  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16664  } else if (match1(parser, PM_TOKEN_EOF)) {
16665  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16666  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16667  } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16668  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16669  } else {
16670  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16671  parser->previous.start = parser->previous.end;
16672  parser->previous.type = PM_TOKEN_MISSING;
16673  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16674  }
16675  } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16676  // In this case we've hit string content so we know the string
16677  // at least has something in it. We'll need to check if the
16678  // following token is the end (in which case we can return a
16679  // plain string) or if it's not then it has interpolation.
16680  pm_token_t content = parser->current;
16681  pm_string_t unescaped = parser->current_string;
16682  parser_lex(parser);
16683 
16684  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16685  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16686  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16687 
16688  // Kind of odd behavior, but basically if we have an
16689  // unterminated string and it ends in a newline, we back up one
16690  // character so that the error message is on the last line of
16691  // content in the string.
16692  if (!accept1(parser, PM_TOKEN_STRING_END)) {
16693  const uint8_t *location = parser->previous.end;
16694  if (location > parser->start && location[-1] == '\n') location--;
16695  pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16696 
16697  parser->previous.start = parser->previous.end;
16698  parser->previous.type = PM_TOKEN_MISSING;
16699  }
16700  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16701  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16702  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16703  } else {
16704  // If we get here, then we have interpolation so we'll need
16705  // to create a string or symbol node with interpolation.
16706  pm_node_list_t parts = { 0 };
16707  pm_token_t string_opening = not_provided(parser);
16708  pm_token_t string_closing = not_provided(parser);
16709 
16710  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16711  pm_node_flag_set(part, parse_unescaped_encoding(parser));
16712  pm_node_list_append(&parts, part);
16713 
16714  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16715  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16716  pm_node_list_append(&parts, part);
16717  }
16718  }
16719 
16720  if (accept1(parser, PM_TOKEN_LABEL_END)) {
16721  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16722  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16723  } else if (match1(parser, PM_TOKEN_EOF)) {
16724  pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16725  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16726  } else {
16727  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16728  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16729  }
16730 
16731  pm_node_list_free(&parts);
16732  }
16733  } else {
16734  // If we get here, then the first part of the string is not plain
16735  // string content, in which case we need to parse the string as an
16736  // interpolated string.
16737  pm_node_list_t parts = { 0 };
16738  pm_node_t *part;
16739 
16740  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16741  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16742  pm_node_list_append(&parts, part);
16743  }
16744  }
16745 
16746  if (accept1(parser, PM_TOKEN_LABEL_END)) {
16747  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16748  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16749  } else if (match1(parser, PM_TOKEN_EOF)) {
16750  pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16751  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16752  } else {
16753  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16754  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16755  }
16756 
16757  pm_node_list_free(&parts);
16758  }
16759 
16760  if (current == NULL) {
16761  // If the node we just parsed is a symbol node, then we can't
16762  // concatenate it with anything else, so we can now return that
16763  // node.
16765  return node;
16766  }
16767 
16768  // If we don't already have a node, then it's fine and we can just
16769  // set the result to be the node we just parsed.
16770  current = node;
16771  } else {
16772  // Otherwise we need to check the type of the node we just parsed.
16773  // If it cannot be concatenated with the previous node, then we'll
16774  // need to add a syntax error.
16776  pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16777  }
16778 
16779  // If we haven't already created our container for concatenation,
16780  // we'll do that now.
16781  if (!concating) {
16782  concating = true;
16783  pm_token_t bounds = not_provided(parser);
16784 
16785  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16786  pm_interpolated_string_node_append(container, current);
16787  current = (pm_node_t *) container;
16788  }
16789 
16790  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16791  }
16792  }
16793 
16794  return current;
16795 }
16796 
16797 #define PM_PARSE_PATTERN_SINGLE 0
16798 #define PM_PARSE_PATTERN_TOP 1
16799 #define PM_PARSE_PATTERN_MULTI 2
16800 
16801 static pm_node_t *
16802 parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16803 
16809 static void
16810 parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16811  // Skip this capture if it starts with an underscore.
16812  if (*location->start == '_') return;
16813 
16814  if (pm_constant_id_list_includes(captures, capture)) {
16815  pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16816  } else {
16817  pm_constant_id_list_append(captures, capture);
16818  }
16819 }
16820 
16824 static pm_node_t *
16825 parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16826  // Now, if there are any :: operators that follow, parse them as constant
16827  // path nodes.
16828  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16829  pm_token_t delimiter = parser->previous;
16830  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16831  node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16832  }
16833 
16834  // If there is a [ or ( that follows, then this is part of a larger pattern
16835  // expression. We'll parse the inner pattern here, then modify the returned
16836  // inner pattern with our constant path attached.
16837  if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16838  return node;
16839  }
16840 
16841  pm_token_t opening;
16842  pm_token_t closing;
16843  pm_node_t *inner = NULL;
16844 
16845  if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16846  opening = parser->previous;
16847  accept1(parser, PM_TOKEN_NEWLINE);
16848 
16849  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16850  inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16851  accept1(parser, PM_TOKEN_NEWLINE);
16852  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16853  }
16854 
16855  closing = parser->previous;
16856  } else {
16857  parser_lex(parser);
16858  opening = parser->previous;
16859  accept1(parser, PM_TOKEN_NEWLINE);
16860 
16861  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16862  inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16863  accept1(parser, PM_TOKEN_NEWLINE);
16864  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16865  }
16866 
16867  closing = parser->previous;
16868  }
16869 
16870  if (!inner) {
16871  // If there was no inner pattern, then we have something like Foo() or
16872  // Foo[]. In that case we'll create an array pattern with no requireds.
16873  return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16874  }
16875 
16876  // Now that we have the inner pattern, check to see if it's an array, find,
16877  // or hash pattern. If it is, then we'll attach our constant path to it if
16878  // it doesn't already have a constant. If it's not one of those node types
16879  // or it does have a constant, then we'll create an array pattern.
16880  switch (PM_NODE_TYPE(inner)) {
16881  case PM_ARRAY_PATTERN_NODE: {
16882  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16883 
16884  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16885  pattern_node->base.location.start = node->location.start;
16886  pattern_node->base.location.end = closing.end;
16887 
16888  pattern_node->constant = node;
16889  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16890  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16891 
16892  return (pm_node_t *) pattern_node;
16893  }
16894 
16895  break;
16896  }
16897  case PM_FIND_PATTERN_NODE: {
16898  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16899 
16900  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16901  pattern_node->base.location.start = node->location.start;
16902  pattern_node->base.location.end = closing.end;
16903 
16904  pattern_node->constant = node;
16905  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16906  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16907 
16908  return (pm_node_t *) pattern_node;
16909  }
16910 
16911  break;
16912  }
16913  case PM_HASH_PATTERN_NODE: {
16914  pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16915 
16916  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16917  pattern_node->base.location.start = node->location.start;
16918  pattern_node->base.location.end = closing.end;
16919 
16920  pattern_node->constant = node;
16921  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16922  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16923 
16924  return (pm_node_t *) pattern_node;
16925  }
16926 
16927  break;
16928  }
16929  default:
16930  break;
16931  }
16932 
16933  // If we got here, then we didn't return one of the inner patterns by
16934  // attaching its constant. In this case we'll create an array pattern and
16935  // attach our constant to it.
16936  pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16937  pm_array_pattern_node_requireds_append(pattern_node, inner);
16938  return (pm_node_t *) pattern_node;
16939 }
16940 
16944 static pm_splat_node_t *
16945 parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16946  assert(parser->previous.type == PM_TOKEN_USTAR);
16947  pm_token_t operator = parser->previous;
16948  pm_node_t *name = NULL;
16949 
16950  // Rest patterns don't necessarily have a name associated with them. So we
16951  // will check for that here. If they do, then we'll add it to the local
16952  // table since this pattern will cause it to become a local variable.
16953  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16954  pm_token_t identifier = parser->previous;
16955  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16956 
16957  int depth;
16958  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16959  pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16960  }
16961 
16962  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16963  name = (pm_node_t *) pm_local_variable_target_node_create(
16964  parser,
16965  &PM_LOCATION_TOKEN_VALUE(&identifier),
16966  constant_id,
16967  (uint32_t) (depth == -1 ? 0 : depth)
16968  );
16969  }
16970 
16971  // Finally we can return the created node.
16972  return pm_splat_node_create(parser, &operator, name);
16973 }
16974 
16978 static pm_node_t *
16979 parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16980  assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16981  parser_lex(parser);
16982 
16983  pm_token_t operator = parser->previous;
16984  pm_node_t *value = NULL;
16985 
16986  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16987  return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
16988  }
16989 
16990  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16991  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16992 
16993  int depth;
16994  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16995  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16996  }
16997 
16998  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16999  value = (pm_node_t *) pm_local_variable_target_node_create(
17000  parser,
17001  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17002  constant_id,
17003  (uint32_t) (depth == -1 ? 0 : depth)
17004  );
17005  }
17006 
17007  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
17008 }
17009 
17014 static bool
17015 pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
17016  ptrdiff_t length = end - start;
17017  if (length == 0) return false;
17018 
17019  // First ensure that it starts with a valid identifier starting character.
17020  size_t width = char_is_identifier_start(parser, start);
17021  if (width == 0) return false;
17022 
17023  // Next, ensure that it's not an uppercase character.
17024  if (parser->encoding_changed) {
17025  if (parser->encoding->isupper_char(start, length)) return false;
17026  } else {
17027  if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17028  }
17029 
17030  // Next, iterate through all of the bytes of the string to ensure that they
17031  // are all valid identifier characters.
17032  const uint8_t *cursor = start + width;
17033  while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
17034  return cursor == end;
17035 }
17036 
17041 static pm_node_t *
17042 parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17043  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17044 
17045  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17046  int depth = -1;
17047 
17048  if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17049  depth = pm_parser_local_depth_constant_id(parser, constant_id);
17050  } else {
17051  pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17052 
17053  if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17054  PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17055  }
17056  }
17057 
17058  if (depth == -1) {
17059  pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17060  }
17061 
17062  parse_pattern_capture(parser, captures, constant_id, value_loc);
17063  pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17064  parser,
17065  value_loc,
17066  constant_id,
17067  (uint32_t) (depth == -1 ? 0 : depth)
17068  );
17069 
17070  return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17071 }
17072 
17077 static void
17078 parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17079  if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17080  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17081  }
17082 }
17083 
17087 static pm_hash_pattern_node_t *
17088 parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17089  pm_node_list_t assocs = { 0 };
17090  pm_static_literals_t keys = { 0 };
17091  pm_node_t *rest = NULL;
17092 
17093  switch (PM_NODE_TYPE(first_node)) {
17094  case PM_ASSOC_SPLAT_NODE:
17096  rest = first_node;
17097  break;
17098  case PM_SYMBOL_NODE: {
17099  if (pm_symbol_node_label_p(first_node)) {
17100  parse_pattern_hash_key(parser, &keys, first_node);
17101  pm_node_t *value;
17102 
17104  // Otherwise, we will create an implicit local variable
17105  // target for the value.
17106  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17107  } else {
17108  // Here we have a value for the first assoc in the list, so
17109  // we will parse it now.
17110  value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17111  }
17112 
17113  pm_token_t operator = not_provided(parser);
17114  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17115 
17116  pm_node_list_append(&assocs, assoc);
17117  break;
17118  }
17119  }
17120  /* fallthrough */
17121  default: {
17122  // If we get anything else, then this is an error. For this we'll
17123  // create a missing node for the value and create an assoc node for
17124  // the first node in the list.
17125  pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17126  pm_parser_err_node(parser, first_node, diag_id);
17127 
17128  pm_token_t operator = not_provided(parser);
17129  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17130  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17131 
17132  pm_node_list_append(&assocs, assoc);
17133  break;
17134  }
17135  }
17136 
17137  // If there are any other assocs, then we'll parse them now.
17138  while (accept1(parser, PM_TOKEN_COMMA)) {
17139  // Here we need to break to support trailing commas.
17141  // Trailing commas are not allowed to follow a rest pattern.
17142  if (rest != NULL) {
17143  pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17144  }
17145 
17146  break;
17147  }
17148 
17149  if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17150  pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17151 
17152  if (rest == NULL) {
17153  rest = assoc;
17154  } else {
17155  pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17156  pm_node_list_append(&assocs, assoc);
17157  }
17158  } else {
17159  pm_node_t *key;
17160 
17161  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17162  key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17163 
17165  pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17166  } else if (!pm_symbol_node_label_p(key)) {
17167  pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17168  }
17169  } else {
17170  expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17171  key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17172  }
17173 
17174  parse_pattern_hash_key(parser, &keys, key);
17175  pm_node_t *value = NULL;
17176 
17178  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17179  } else {
17180  value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17181  }
17182 
17183  pm_token_t operator = not_provided(parser);
17184  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17185 
17186  if (rest != NULL) {
17187  pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17188  }
17189 
17190  pm_node_list_append(&assocs, assoc);
17191  }
17192  }
17193 
17194  pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17195  xfree(assocs.nodes);
17196 
17197  pm_static_literals_free(&keys);
17198  return node;
17199 }
17200 
17204 static pm_node_t *
17205 parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17206  switch (parser->current.type) {
17207  case PM_TOKEN_IDENTIFIER:
17208  case PM_TOKEN_METHOD_NAME: {
17209  parser_lex(parser);
17210  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17211 
17212  int depth;
17213  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17214  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17215  }
17216 
17217  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17218  return (pm_node_t *) pm_local_variable_target_node_create(
17219  parser,
17220  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17221  constant_id,
17222  (uint32_t) (depth == -1 ? 0 : depth)
17223  );
17224  }
17226  pm_token_t opening = parser->current;
17227  parser_lex(parser);
17228 
17229  if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17230  // If we have an empty array pattern, then we'll just return a new
17231  // array pattern node.
17232  return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17233  }
17234 
17235  // Otherwise, we'll parse the inner pattern, then deal with it depending
17236  // on the type it returns.
17237  pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17238 
17239  accept1(parser, PM_TOKEN_NEWLINE);
17240  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17241  pm_token_t closing = parser->previous;
17242 
17243  switch (PM_NODE_TYPE(inner)) {
17244  case PM_ARRAY_PATTERN_NODE: {
17245  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17246  if (pattern_node->opening_loc.start == NULL) {
17247  pattern_node->base.location.start = opening.start;
17248  pattern_node->base.location.end = closing.end;
17249 
17250  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17251  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17252 
17253  return (pm_node_t *) pattern_node;
17254  }
17255 
17256  break;
17257  }
17258  case PM_FIND_PATTERN_NODE: {
17259  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17260  if (pattern_node->opening_loc.start == NULL) {
17261  pattern_node->base.location.start = opening.start;
17262  pattern_node->base.location.end = closing.end;
17263 
17264  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17265  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17266 
17267  return (pm_node_t *) pattern_node;
17268  }
17269 
17270  break;
17271  }
17272  default:
17273  break;
17274  }
17275 
17276  pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17277  pm_array_pattern_node_requireds_append(node, inner);
17278  return (pm_node_t *) node;
17279  }
17280  case PM_TOKEN_BRACE_LEFT: {
17281  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17282  parser->pattern_matching_newlines = false;
17283 
17284  pm_hash_pattern_node_t *node;
17285  pm_token_t opening = parser->current;
17286  parser_lex(parser);
17287 
17288  if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17289  // If we have an empty hash pattern, then we'll just return a new hash
17290  // pattern node.
17291  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17292  } else {
17293  pm_node_t *first_node;
17294 
17295  switch (parser->current.type) {
17296  case PM_TOKEN_LABEL:
17297  parser_lex(parser);
17298  first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17299  break;
17300  case PM_TOKEN_USTAR_STAR:
17301  first_node = parse_pattern_keyword_rest(parser, captures);
17302  break;
17303  case PM_TOKEN_STRING_BEGIN:
17304  first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17305  break;
17306  default: {
17307  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17308  parser_lex(parser);
17309 
17310  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17311  break;
17312  }
17313  }
17314 
17315  node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17316 
17317  accept1(parser, PM_TOKEN_NEWLINE);
17318  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17319  pm_token_t closing = parser->previous;
17320 
17321  node->base.location.start = opening.start;
17322  node->base.location.end = closing.end;
17323 
17324  node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17325  node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17326  }
17327 
17328  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17329  return (pm_node_t *) node;
17330  }
17331  case PM_TOKEN_UDOT_DOT:
17332  case PM_TOKEN_UDOT_DOT_DOT: {
17333  pm_token_t operator = parser->current;
17334  parser_lex(parser);
17335 
17336  // Since we have a unary range operator, we need to parse the subsequent
17337  // expression as the right side of the range.
17338  switch (parser->current.type) {
17339  case PM_CASE_PRIMITIVE: {
17340  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17341  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17342  }
17343  default: {
17344  pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17345  pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17346  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17347  }
17348  }
17349  }
17350  case PM_CASE_PRIMITIVE: {
17351  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17352 
17353  // If we found a label, we need to immediately return to the caller.
17354  if (pm_symbol_node_label_p(node)) return node;
17355 
17356  // Now that we have a primitive, we need to check if it's part of a range.
17357  if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17358  pm_token_t operator = parser->previous;
17359 
17360  // Now that we have the operator, we need to check if this is followed
17361  // by another expression. If it is, then we will create a full range
17362  // node. Otherwise, we'll create an endless range.
17363  switch (parser->current.type) {
17364  case PM_CASE_PRIMITIVE: {
17365  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17366  return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17367  }
17368  default:
17369  return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17370  }
17371  }
17372 
17373  return node;
17374  }
17375  case PM_TOKEN_CARET: {
17376  parser_lex(parser);
17377  pm_token_t operator = parser->previous;
17378 
17379  // At this point we have a pin operator. We need to check the subsequent
17380  // expression to determine if it's a variable or an expression.
17381  switch (parser->current.type) {
17382  case PM_TOKEN_IDENTIFIER: {
17383  parser_lex(parser);
17384  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17385 
17386  if (variable == NULL) {
17387  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17388  variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17389  }
17390 
17391  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17392  }
17394  parser_lex(parser);
17395  pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17396 
17397  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17398  }
17399  case PM_TOKEN_CLASS_VARIABLE: {
17400  parser_lex(parser);
17401  pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17402 
17403  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17404  }
17405  case PM_TOKEN_GLOBAL_VARIABLE: {
17406  parser_lex(parser);
17407  pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17408 
17409  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17410  }
17412  parser_lex(parser);
17413  pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17414 
17415  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17416  }
17417  case PM_TOKEN_BACK_REFERENCE: {
17418  parser_lex(parser);
17419  pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17420 
17421  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17422  }
17424  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17425  parser->pattern_matching_newlines = false;
17426 
17427  pm_token_t lparen = parser->current;
17428  parser_lex(parser);
17429 
17430  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17431  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17432 
17433  accept1(parser, PM_TOKEN_NEWLINE);
17434  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17435  return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17436  }
17437  default: {
17438  // If we get here, then we have a pin operator followed by something
17439  // not understood. We'll create a missing node and return that.
17440  pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17441  pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17442  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17443  }
17444  }
17445  }
17446  case PM_TOKEN_UCOLON_COLON: {
17447  pm_token_t delimiter = parser->current;
17448  parser_lex(parser);
17449 
17450  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17451  pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17452 
17453  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17454  }
17455  case PM_TOKEN_CONSTANT: {
17456  pm_token_t constant = parser->current;
17457  parser_lex(parser);
17458 
17459  pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17460  return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17461  }
17462  default:
17463  pm_parser_err_current(parser, diag_id);
17464  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17465  }
17466 }
17467 
17472 static pm_node_t *
17473 parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17474  pm_node_t *node = first_node;
17475 
17476  while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17477  pm_token_t operator = parser->previous;
17478 
17479  switch (parser->current.type) {
17480  case PM_TOKEN_IDENTIFIER:
17482  case PM_TOKEN_BRACE_LEFT:
17483  case PM_TOKEN_CARET:
17484  case PM_TOKEN_CONSTANT:
17485  case PM_TOKEN_UCOLON_COLON:
17486  case PM_TOKEN_UDOT_DOT:
17487  case PM_TOKEN_UDOT_DOT_DOT:
17488  case PM_CASE_PRIMITIVE: {
17489  if (node == NULL) {
17490  node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17491  } else {
17492  pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17493  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17494  }
17495 
17496  break;
17497  }
17500  pm_token_t opening = parser->current;
17501  parser_lex(parser);
17502 
17503  pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17504  accept1(parser, PM_TOKEN_NEWLINE);
17505  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17506  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17507 
17508  if (node == NULL) {
17509  node = right;
17510  } else {
17511  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17512  }
17513 
17514  break;
17515  }
17516  default: {
17517  pm_parser_err_current(parser, diag_id);
17518  pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17519 
17520  if (node == NULL) {
17521  node = right;
17522  } else {
17523  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17524  }
17525 
17526  break;
17527  }
17528  }
17529  }
17530 
17531  // If we have an =>, then we are assigning this pattern to a variable.
17532  // In this case we should create an assignment node.
17533  while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17534  pm_token_t operator = parser->previous;
17535  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17536 
17537  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17538  int depth;
17539 
17540  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17541  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17542  }
17543 
17544  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17545  pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17546  parser,
17547  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17548  constant_id,
17549  (uint32_t) (depth == -1 ? 0 : depth)
17550  );
17551 
17552  node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17553  }
17554 
17555  return node;
17556 }
17557 
17561 static pm_node_t *
17562 parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17563  pm_node_t *node = NULL;
17564 
17565  bool leading_rest = false;
17566  bool trailing_rest = false;
17567 
17568  switch (parser->current.type) {
17569  case PM_TOKEN_LABEL: {
17570  parser_lex(parser);
17571  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17572  node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17573 
17574  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17575  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17576  }
17577 
17578  return node;
17579  }
17580  case PM_TOKEN_USTAR_STAR: {
17581  node = parse_pattern_keyword_rest(parser, captures);
17582  node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17583 
17584  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17585  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17586  }
17587 
17588  return node;
17589  }
17590  case PM_TOKEN_STRING_BEGIN: {
17591  // We need special handling for string beginnings because they could
17592  // be dynamic symbols leading to hash patterns.
17593  node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17594 
17595  if (pm_symbol_node_label_p(node)) {
17596  node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17597 
17598  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17599  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17600  }
17601 
17602  return node;
17603  }
17604 
17605  node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17606  break;
17607  }
17608  case PM_TOKEN_USTAR: {
17609  if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17610  parser_lex(parser);
17611  node = (pm_node_t *) parse_pattern_rest(parser, captures);
17612  leading_rest = true;
17613  break;
17614  }
17615  }
17616  /* fallthrough */
17617  default:
17618  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17619  break;
17620  }
17621 
17622  // If we got a dynamic label symbol, then we need to treat it like the
17623  // beginning of a hash pattern.
17624  if (pm_symbol_node_label_p(node)) {
17625  return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17626  }
17627 
17628  if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17629  // If we have a comma, then we are now parsing either an array pattern
17630  // or a find pattern. We need to parse all of the patterns, put them
17631  // into a big list, and then determine which type of node we have.
17632  pm_node_list_t nodes = { 0 };
17633  pm_node_list_append(&nodes, node);
17634 
17635  // Gather up all of the patterns into the list.
17636  while (accept1(parser, PM_TOKEN_COMMA)) {
17637  // Break early here in case we have a trailing comma.
17639  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17640  pm_node_list_append(&nodes, node);
17641  trailing_rest = true;
17642  break;
17643  }
17644 
17645  if (accept1(parser, PM_TOKEN_USTAR)) {
17646  node = (pm_node_t *) parse_pattern_rest(parser, captures);
17647 
17648  // If we have already parsed a splat pattern, then this is an
17649  // error. We will continue to parse the rest of the patterns,
17650  // but we will indicate it as an error.
17651  if (trailing_rest) {
17652  pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17653  }
17654 
17655  trailing_rest = true;
17656  } else {
17657  node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17658  }
17659 
17660  pm_node_list_append(&nodes, node);
17661  }
17662 
17663  // If the first pattern and the last pattern are rest patterns, then we
17664  // will call this a find pattern, regardless of how many rest patterns
17665  // are in between because we know we already added the appropriate
17666  // errors. Otherwise we will create an array pattern.
17667  if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17668  node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17669 
17670  if (nodes.size == 2) {
17671  pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17672  }
17673  } else {
17674  node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17675 
17676  if (leading_rest && trailing_rest) {
17677  pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17678  }
17679  }
17680 
17681  xfree(nodes.nodes);
17682  } else if (leading_rest) {
17683  // Otherwise, if we parsed a single splat pattern, then we know we have
17684  // an array pattern, so we can go ahead and create that node.
17685  node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17686  }
17687 
17688  return node;
17689 }
17690 
17696 static inline void
17697 parse_negative_numeric(pm_node_t *node) {
17698  switch (PM_NODE_TYPE(node)) {
17699  case PM_INTEGER_NODE: {
17700  pm_integer_node_t *cast = (pm_integer_node_t *) node;
17701  cast->base.location.start--;
17702  cast->value.negative = true;
17703  break;
17704  }
17705  case PM_FLOAT_NODE: {
17706  pm_float_node_t *cast = (pm_float_node_t *) node;
17707  cast->base.location.start--;
17708  cast->value = -cast->value;
17709  break;
17710  }
17711  case PM_RATIONAL_NODE: {
17712  pm_rational_node_t *cast = (pm_rational_node_t *) node;
17713  cast->base.location.start--;
17714  cast->numerator.negative = true;
17715  break;
17716  }
17717  case PM_IMAGINARY_NODE:
17718  node->location.start--;
17719  parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17720  break;
17721  default:
17722  assert(false && "unreachable");
17723  break;
17724  }
17725 }
17726 
17732 static void
17733 pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17734  switch (diag_id) {
17735  case PM_ERR_HASH_KEY: {
17736  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17737  break;
17738  }
17739  case PM_ERR_HASH_VALUE:
17740  case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17741  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17742  break;
17743  }
17744  case PM_ERR_UNARY_RECEIVER: {
17745  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17746  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17747  break;
17748  }
17749  case PM_ERR_UNARY_DISALLOWED:
17750  case PM_ERR_EXPECT_ARGUMENT: {
17751  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17752  break;
17753  }
17754  default:
17755  pm_parser_err_previous(parser, diag_id);
17756  break;
17757  }
17758 }
17759 
17763 static void
17764 parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17765 #define CONTEXT_NONE 0
17766 #define CONTEXT_THROUGH_ENSURE 1
17767 #define CONTEXT_THROUGH_ELSE 2
17768 
17769  pm_context_node_t *context_node = parser->current_context;
17770  int context = CONTEXT_NONE;
17771 
17772  while (context_node != NULL) {
17773  switch (context_node->context) {
17777  case PM_CONTEXT_DEF_RESCUE:
17781  case PM_CONTEXT_DEFINED:
17783  // These are the good cases. We're allowed to have a retry here.
17784  return;
17785  case PM_CONTEXT_CLASS:
17786  case PM_CONTEXT_DEF:
17787  case PM_CONTEXT_DEF_PARAMS:
17788  case PM_CONTEXT_MAIN:
17789  case PM_CONTEXT_MODULE:
17790  case PM_CONTEXT_PREEXE:
17791  case PM_CONTEXT_SCLASS:
17792  // These are the bad cases. We're not allowed to have a retry in
17793  // these contexts.
17794  if (context == CONTEXT_NONE) {
17795  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17796  } else if (context == CONTEXT_THROUGH_ENSURE) {
17797  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17798  } else if (context == CONTEXT_THROUGH_ELSE) {
17799  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17800  }
17801  return;
17802  case PM_CONTEXT_BEGIN_ELSE:
17803  case PM_CONTEXT_BLOCK_ELSE:
17804  case PM_CONTEXT_CLASS_ELSE:
17805  case PM_CONTEXT_DEF_ELSE:
17809  // These are also bad cases, but with a more specific error
17810  // message indicating the else.
17811  context = CONTEXT_THROUGH_ELSE;
17812  break;
17816  case PM_CONTEXT_DEF_ENSURE:
17820  // These are also bad cases, but with a more specific error
17821  // message indicating the ensure.
17822  context = CONTEXT_THROUGH_ENSURE;
17823  break;
17824  case PM_CONTEXT_NONE:
17825  // This case should never happen.
17826  assert(false && "unreachable");
17827  break;
17828  case PM_CONTEXT_BEGIN:
17831  case PM_CONTEXT_CASE_IN:
17832  case PM_CONTEXT_CASE_WHEN:
17834  case PM_CONTEXT_ELSE:
17835  case PM_CONTEXT_ELSIF:
17836  case PM_CONTEXT_EMBEXPR:
17837  case PM_CONTEXT_FOR_INDEX:
17838  case PM_CONTEXT_FOR:
17839  case PM_CONTEXT_IF:
17844  case PM_CONTEXT_PARENS:
17845  case PM_CONTEXT_POSTEXE:
17846  case PM_CONTEXT_PREDICATE:
17847  case PM_CONTEXT_TERNARY:
17848  case PM_CONTEXT_UNLESS:
17849  case PM_CONTEXT_UNTIL:
17850  case PM_CONTEXT_WHILE:
17851  // In these contexts we should continue walking up the list of
17852  // contexts.
17853  break;
17854  }
17855 
17856  context_node = context_node->prev;
17857  }
17858 
17859 #undef CONTEXT_NONE
17860 #undef CONTEXT_ENSURE
17861 #undef CONTEXT_ELSE
17862 }
17863 
17867 static void
17868 parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17869  pm_context_node_t *context_node = parser->current_context;
17870 
17871  while (context_node != NULL) {
17872  switch (context_node->context) {
17873  case PM_CONTEXT_DEF:
17874  case PM_CONTEXT_DEF_PARAMS:
17875  case PM_CONTEXT_DEFINED:
17876  case PM_CONTEXT_DEF_ENSURE:
17877  case PM_CONTEXT_DEF_RESCUE:
17878  case PM_CONTEXT_DEF_ELSE:
17879  // These are the good cases. We're allowed to have a block exit
17880  // in these contexts.
17881  return;
17882  case PM_CONTEXT_CLASS:
17885  case PM_CONTEXT_CLASS_ELSE:
17886  case PM_CONTEXT_MAIN:
17887  case PM_CONTEXT_MODULE:
17891  case PM_CONTEXT_SCLASS:
17895  // These are the bad cases. We're not allowed to have a retry in
17896  // these contexts.
17897  pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17898  return;
17899  case PM_CONTEXT_NONE:
17900  // This case should never happen.
17901  assert(false && "unreachable");
17902  break;
17903  case PM_CONTEXT_BEGIN:
17904  case PM_CONTEXT_BEGIN_ELSE:
17909  case PM_CONTEXT_BLOCK_ELSE:
17912  case PM_CONTEXT_CASE_IN:
17913  case PM_CONTEXT_CASE_WHEN:
17915  case PM_CONTEXT_ELSE:
17916  case PM_CONTEXT_ELSIF:
17917  case PM_CONTEXT_EMBEXPR:
17918  case PM_CONTEXT_FOR_INDEX:
17919  case PM_CONTEXT_FOR:
17920  case PM_CONTEXT_IF:
17928  case PM_CONTEXT_PARENS:
17929  case PM_CONTEXT_POSTEXE:
17930  case PM_CONTEXT_PREDICATE:
17931  case PM_CONTEXT_PREEXE:
17933  case PM_CONTEXT_TERNARY:
17934  case PM_CONTEXT_UNLESS:
17935  case PM_CONTEXT_UNTIL:
17936  case PM_CONTEXT_WHILE:
17937  // In these contexts we should continue walking up the list of
17938  // contexts.
17939  break;
17940  }
17941 
17942  context_node = context_node->prev;
17943  }
17944 }
17945 
17950 typedef struct {
17953 
17955  const uint8_t *start;
17956 
17958  const uint8_t *end;
17959 
17966  bool shared;
17968 
17973 static void
17974 parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17976  pm_location_t location;
17977 
17978  if (callback_data->shared) {
17979  location = (pm_location_t) { .start = start, .end = end };
17980  } else {
17981  location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17982  }
17983 
17984  PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17985 }
17986 
17990 static void
17991 parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17992  const pm_string_t *unescaped = &node->unescaped;
17994  .parser = parser,
17995  .start = node->base.location.start,
17996  .end = node->base.location.end,
17997  .shared = unescaped->type == PM_STRING_SHARED
17998  };
17999 
18000  pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
18001 }
18002 
18006 static inline pm_node_t *
18007 parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
18008  switch (parser->current.type) {
18010  parser_lex(parser);
18011 
18012  pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
18013  pm_accepts_block_stack_push(parser, true);
18014  bool parsed_bare_hash = false;
18015 
18016  while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
18017  bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
18018 
18019  // Handle the case where we don't have a comma and we have a
18020  // newline followed by a right bracket.
18021  if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18022  break;
18023  }
18024 
18025  // Ensure that we have a comma between elements in the array.
18026  if (array->elements.size > 0) {
18027  if (accept1(parser, PM_TOKEN_COMMA)) {
18028  // If there was a comma but we also accepts a newline,
18029  // then this is a syntax error.
18030  if (accepted_newline) {
18031  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18032  }
18033  } else {
18034  // If there was no comma, then we need to add a syntax
18035  // error.
18036  const uint8_t *location = parser->previous.end;
18037  PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18038 
18039  parser->previous.start = location;
18040  parser->previous.type = PM_TOKEN_MISSING;
18041  }
18042  }
18043 
18044  // If we have a right bracket immediately following a comma,
18045  // this is allowed since it's a trailing comma. In this case we
18046  // can break out of the loop.
18047  if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18048 
18049  pm_node_t *element;
18050 
18051  if (accept1(parser, PM_TOKEN_USTAR)) {
18052  pm_token_t operator = parser->previous;
18053  pm_node_t *expression = NULL;
18054 
18055  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18056  pm_parser_scope_forwarding_positionals_check(parser, &operator);
18057  } else {
18058  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18059  }
18060 
18061  element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18062  } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18063  if (parsed_bare_hash) {
18064  pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18065  }
18066 
18067  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18068  pm_static_literals_t hash_keys = { 0 };
18069 
18071  parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18072  }
18073 
18074  pm_static_literals_free(&hash_keys);
18075  parsed_bare_hash = true;
18076  } else {
18077  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18078 
18079  if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18080  if (parsed_bare_hash) {
18081  pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18082  }
18083 
18084  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18085  pm_static_literals_t hash_keys = { 0 };
18086  pm_hash_key_static_literals_add(parser, &hash_keys, element);
18087 
18088  pm_token_t operator;
18089  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18090  operator = parser->previous;
18091  } else {
18092  operator = not_provided(parser);
18093  }
18094 
18095  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18096  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18097  pm_keyword_hash_node_elements_append(hash, assoc);
18098 
18099  element = (pm_node_t *) hash;
18100  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18101  parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18102  }
18103 
18104  pm_static_literals_free(&hash_keys);
18105  parsed_bare_hash = true;
18106  }
18107  }
18108 
18109  pm_array_node_elements_append(array, element);
18110  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18111  }
18112 
18113  accept1(parser, PM_TOKEN_NEWLINE);
18114 
18115  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18116  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18117  parser->previous.start = parser->previous.end;
18118  parser->previous.type = PM_TOKEN_MISSING;
18119  }
18120 
18121  pm_array_node_close_set(array, &parser->previous);
18122  pm_accepts_block_stack_pop(parser);
18123 
18124  return (pm_node_t *) array;
18125  }
18128  pm_token_t opening = parser->current;
18129 
18130  pm_node_list_t current_block_exits = { 0 };
18131  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18132 
18133  parser_lex(parser);
18134  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18135 
18136  // If this is the end of the file or we match a right parenthesis, then
18137  // we have an empty parentheses node, and we can immediately return.
18138  if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18139  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18140 
18141  pop_block_exits(parser, previous_block_exits);
18142  pm_node_list_free(&current_block_exits);
18143 
18144  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18145  }
18146 
18147  // Otherwise, we're going to parse the first statement in the list
18148  // of statements within the parentheses.
18149  pm_accepts_block_stack_push(parser, true);
18150  context_push(parser, PM_CONTEXT_PARENS);
18151  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18152  context_pop(parser);
18153 
18154  // Determine if this statement is followed by a terminator. In the
18155  // case of a single statement, this is fine. But in the case of
18156  // multiple statements it's required.
18157  bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18158  if (terminator_found) {
18159  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18160  }
18161 
18162  // If we hit a right parenthesis, then we're done parsing the
18163  // parentheses node, and we can check which kind of node we should
18164  // return.
18165  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18167  lex_state_set(parser, PM_LEX_STATE_ENDARG);
18168  }
18169 
18170  parser_lex(parser);
18171  pm_accepts_block_stack_pop(parser);
18172 
18173  pop_block_exits(parser, previous_block_exits);
18174  pm_node_list_free(&current_block_exits);
18175 
18176  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18177  // If we have a single statement and are ending on a right
18178  // parenthesis, then we need to check if this is possibly a
18179  // multiple target node.
18180  pm_multi_target_node_t *multi_target;
18181 
18182  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18183  multi_target = (pm_multi_target_node_t *) statement;
18184  } else {
18185  multi_target = pm_multi_target_node_create(parser);
18186  pm_multi_target_node_targets_append(parser, multi_target, statement);
18187  }
18188 
18189  pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18190  pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18191 
18192  multi_target->lparen_loc = lparen_loc;
18193  multi_target->rparen_loc = rparen_loc;
18194  multi_target->base.location.start = lparen_loc.start;
18195  multi_target->base.location.end = rparen_loc.end;
18196 
18197  pm_node_t *result;
18198  if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18199  result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18200  accept1(parser, PM_TOKEN_NEWLINE);
18201  } else {
18202  result = (pm_node_t *) multi_target;
18203  }
18204 
18205  if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18206  // All set, this is explicitly allowed by the parent
18207  // context.
18208  } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18209  // All set, we're inside a for loop and we're parsing
18210  // multiple targets.
18211  } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18212  // Multi targets are not allowed when it's not a
18213  // statement level.
18214  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18215  } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18216  // Multi targets must be followed by an equal sign in
18217  // order to be valid (or a right parenthesis if they are
18218  // nested).
18219  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18220  }
18221 
18222  return result;
18223  }
18224 
18225  // If we have a single statement and are ending on a right parenthesis
18226  // and we didn't return a multiple assignment node, then we can return a
18227  // regular parentheses node now.
18228  pm_statements_node_t *statements = pm_statements_node_create(parser);
18229  pm_statements_node_body_append(parser, statements, statement, true);
18230 
18231  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18232  }
18233 
18234  // If we have more than one statement in the set of parentheses,
18235  // then we are going to parse all of them as a list of statements.
18236  // We'll do that here.
18237  context_push(parser, PM_CONTEXT_PARENS);
18238  pm_statements_node_t *statements = pm_statements_node_create(parser);
18239  pm_statements_node_body_append(parser, statements, statement, true);
18240 
18241  // If we didn't find a terminator and we didn't find a right
18242  // parenthesis, then this is a syntax error.
18243  if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18244  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18245  }
18246 
18247  // Parse each statement within the parentheses.
18248  while (true) {
18249  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18250  pm_statements_node_body_append(parser, statements, node, true);
18251 
18252  // If we're recovering from a syntax error, then we need to stop
18253  // parsing the statements now.
18254  if (parser->recovering) {
18255  // If this is the level of context where the recovery has
18256  // happened, then we can mark the parser as done recovering.
18257  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18258  break;
18259  }
18260 
18261  // If we couldn't parse an expression at all, then we need to
18262  // bail out of the loop.
18263  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18264 
18265  // If we successfully parsed a statement, then we are going to
18266  // need terminator to delimit them.
18267  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18268  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18269  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18270  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18271  break;
18272  } else if (!match1(parser, PM_TOKEN_EOF)) {
18273  // If we're at the end of the file, then we're going to add
18274  // an error after this for the ) anyway.
18275  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18276  }
18277  }
18278 
18279  context_pop(parser);
18280  pm_accepts_block_stack_pop(parser);
18281  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18282 
18283  // When we're parsing multi targets, we allow them to be followed by
18284  // a right parenthesis if they are at the statement level. This is
18285  // only possible if they are the final statement in a parentheses.
18286  // We need to explicitly reject that here.
18287  {
18288  pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18289 
18290  if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18291  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18292  pm_multi_target_node_targets_append(parser, multi_target, statement);
18293 
18294  statement = (pm_node_t *) multi_target;
18295  statements->body.nodes[statements->body.size - 1] = statement;
18296  }
18297 
18298  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18299  const uint8_t *offset = statement->location.end;
18300  pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18301  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18302 
18303  statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18304  statements->body.nodes[statements->body.size - 1] = statement;
18305 
18306  pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18307  }
18308  }
18309 
18310  pop_block_exits(parser, previous_block_exits);
18311  pm_node_list_free(&current_block_exits);
18312 
18313  pm_void_statements_check(parser, statements, true);
18314  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18315  }
18316  case PM_TOKEN_BRACE_LEFT: {
18317  // If we were passed a current_hash_keys via the parser, then that
18318  // means we're already parsing a hash and we want to share the set
18319  // of hash keys with this inner hash we're about to parse for the
18320  // sake of warnings. We'll set it to NULL after we grab it to make
18321  // sure subsequent expressions don't use it. Effectively this is a
18322  // way of getting around passing it to every call to
18323  // parse_expression.
18324  pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18325  parser->current_hash_keys = NULL;
18326 
18327  pm_accepts_block_stack_push(parser, true);
18328  parser_lex(parser);
18329 
18330  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18331 
18332  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18333  if (current_hash_keys != NULL) {
18334  parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18335  } else {
18336  pm_static_literals_t hash_keys = { 0 };
18337  parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18338  pm_static_literals_free(&hash_keys);
18339  }
18340 
18341  accept1(parser, PM_TOKEN_NEWLINE);
18342  }
18343 
18344  pm_accepts_block_stack_pop(parser);
18345  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18346  pm_hash_node_closing_loc_set(node, &parser->previous);
18347 
18348  return (pm_node_t *) node;
18349  }
18351  parser_lex(parser);
18352 
18353  pm_token_t opening = parser->previous;
18354  opening.type = PM_TOKEN_STRING_BEGIN;
18355  opening.end = opening.start + 1;
18356 
18357  pm_token_t content = parser->previous;
18358  content.type = PM_TOKEN_STRING_CONTENT;
18359  content.start = content.start + 1;
18360 
18361  pm_token_t closing = not_provided(parser);
18362  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18363  pm_node_flag_set(node, parse_unescaped_encoding(parser));
18364 
18365  // Characters can be followed by strings in which case they are
18366  // automatically concatenated.
18367  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18368  return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18369  }
18370 
18371  return node;
18372  }
18373  case PM_TOKEN_CLASS_VARIABLE: {
18374  parser_lex(parser);
18375  pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18376 
18377  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18378  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18379  }
18380 
18381  return node;
18382  }
18383  case PM_TOKEN_CONSTANT: {
18384  parser_lex(parser);
18385  pm_token_t constant = parser->previous;
18386 
18387  // If a constant is immediately followed by parentheses, then this is in
18388  // fact a method call, not a constant read.
18389  if (
18390  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18391  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18392  (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18393  match1(parser, PM_TOKEN_BRACE_LEFT)
18394  ) {
18395  pm_arguments_t arguments = { 0 };
18396  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18397  return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18398  }
18399 
18400  pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18401 
18402  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18403  // If we get here, then we have a comma immediately following a
18404  // constant, so we're going to parse this as a multiple assignment.
18405  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18406  }
18407 
18408  return node;
18409  }
18410  case PM_TOKEN_UCOLON_COLON: {
18411  parser_lex(parser);
18412  pm_token_t delimiter = parser->previous;
18413 
18414  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18415  pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18416 
18417  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18418  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18419  }
18420 
18421  return node;
18422  }
18423  case PM_TOKEN_UDOT_DOT:
18424  case PM_TOKEN_UDOT_DOT_DOT: {
18425  pm_token_t operator = parser->current;
18426  parser_lex(parser);
18427 
18428  pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18429 
18430  // Unary .. and ... are special because these are non-associative
18431  // operators that can also be unary operators. In this case we need
18432  // to explicitly reject code that has a .. or ... that follows this
18433  // expression.
18434  if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18435  pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18436  }
18437 
18438  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18439  }
18440  case PM_TOKEN_FLOAT:
18441  parser_lex(parser);
18442  return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18444  parser_lex(parser);
18445  return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18447  parser_lex(parser);
18448  return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18450  parser_lex(parser);
18451  return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18453  parser_lex(parser);
18454  pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18455 
18456  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18457  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18458  }
18459 
18460  return node;
18461  }
18462  case PM_TOKEN_GLOBAL_VARIABLE: {
18463  parser_lex(parser);
18464  pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18465 
18466  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18467  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18468  }
18469 
18470  return node;
18471  }
18472  case PM_TOKEN_BACK_REFERENCE: {
18473  parser_lex(parser);
18474  pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18475 
18476  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18477  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18478  }
18479 
18480  return node;
18481  }
18482  case PM_TOKEN_IDENTIFIER:
18483  case PM_TOKEN_METHOD_NAME: {
18484  parser_lex(parser);
18485  pm_token_t identifier = parser->previous;
18486  pm_node_t *node = parse_variable_call(parser);
18487 
18488  if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18489  // If parse_variable_call returned with a call node, then we
18490  // know the identifier is not in the local table. In that case
18491  // we need to check if there are arguments following the
18492  // identifier.
18493  pm_call_node_t *call = (pm_call_node_t *) node;
18494  pm_arguments_t arguments = { 0 };
18495 
18496  if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18497  // Since we found arguments, we need to turn off the
18498  // variable call bit in the flags.
18499  pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18500 
18501  call->opening_loc = arguments.opening_loc;
18502  call->arguments = arguments.arguments;
18503  call->closing_loc = arguments.closing_loc;
18504  call->block = arguments.block;
18505 
18506  if (arguments.block != NULL) {
18507  call->base.location.end = arguments.block->location.end;
18508  } else if (arguments.closing_loc.start == NULL) {
18509  if (arguments.arguments != NULL) {
18510  call->base.location.end = arguments.arguments->base.location.end;
18511  } else {
18512  call->base.location.end = call->message_loc.end;
18513  }
18514  } else {
18515  call->base.location.end = arguments.closing_loc.end;
18516  }
18517  }
18518  } else {
18519  // Otherwise, we know the identifier is in the local table. This
18520  // can still be a method call if it is followed by arguments or
18521  // a block, so we need to check for that here.
18522  if (
18523  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18524  (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18525  match1(parser, PM_TOKEN_BRACE_LEFT)
18526  ) {
18527  pm_arguments_t arguments = { 0 };
18528  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18529  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18530 
18532  // If we're about to convert an 'it' implicit local
18533  // variable read into a method call, we need to remove
18534  // it from the list of implicit local variables.
18535  parse_target_implicit_parameter(parser, node);
18536  } else {
18537  // Otherwise, we're about to convert a regular local
18538  // variable read into a method call, in which case we
18539  // need to indicate that this was not a read for the
18540  // purposes of warnings.
18542 
18543  if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18544  parse_target_implicit_parameter(parser, node);
18545  } else {
18547  pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18548  }
18549  }
18550 
18551  pm_node_destroy(parser, node);
18552  return (pm_node_t *) fcall;
18553  }
18554  }
18555 
18556  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18557  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18558  }
18559 
18560  return node;
18561  }
18562  case PM_TOKEN_HEREDOC_START: {
18563  // Here we have found a heredoc. We'll parse it and add it to the
18564  // list of strings.
18565  assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18566  pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18567 
18568  size_t common_whitespace = (size_t) -1;
18569  parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18570 
18571  parser_lex(parser);
18572  pm_token_t opening = parser->previous;
18573 
18574  pm_node_t *node;
18575  pm_node_t *part;
18576 
18577  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18578  // If we get here, then we have an empty heredoc. We'll create
18579  // an empty content token and return an empty string node.
18580  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18581  pm_token_t content = parse_strings_empty_content(parser->previous.start);
18582 
18583  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18584  node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18585  } else {
18586  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18587  }
18588 
18589  node->location.end = opening.end;
18590  } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18591  // If we get here, then we tried to find something in the
18592  // heredoc but couldn't actually parse anything, so we'll just
18593  // return a missing node.
18594  //
18595  // parse_string_part handles its own errors, so there is no need
18596  // for us to add one here.
18597  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18598  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18599  // If we get here, then the part that we parsed was plain string
18600  // content and we're at the end of the heredoc, so we can return
18601  // just a string node with the heredoc opening and closing as
18602  // its opening and closing.
18603  pm_node_flag_set(part, parse_unescaped_encoding(parser));
18604  pm_string_node_t *cast = (pm_string_node_t *) part;
18605 
18606  cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18607  cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18608  cast->base.location = cast->opening_loc;
18609 
18610  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18611  assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18612  cast->base.type = PM_X_STRING_NODE;
18613  }
18614 
18615  if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18616  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18617  }
18618 
18619  node = (pm_node_t *) cast;
18620  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18621  } else {
18622  // If we get here, then we have multiple parts in the heredoc,
18623  // so we'll need to create an interpolated string node to hold
18624  // them all.
18625  pm_node_list_t parts = { 0 };
18626  pm_node_list_append(&parts, part);
18627 
18628  while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18629  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18630  pm_node_list_append(&parts, part);
18631  }
18632  }
18633 
18634  // Now that we have all of the parts, create the correct type of
18635  // interpolated node.
18636  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18637  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18638  cast->parts = parts;
18639 
18640  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18641  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18642 
18643  cast->base.location = cast->opening_loc;
18644  node = (pm_node_t *) cast;
18645  } else {
18646  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18647  pm_node_list_free(&parts);
18648 
18649  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18650  pm_interpolated_string_node_closing_set(cast, &parser->previous);
18651 
18652  cast->base.location = cast->opening_loc;
18653  node = (pm_node_t *) cast;
18654  }
18655 
18656  // If this is a heredoc that is indented with a ~, then we need
18657  // to dedent each line by the common leading whitespace.
18658  if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18659  pm_node_list_t *nodes;
18660  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18661  nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18662  } else {
18663  nodes = &((pm_interpolated_string_node_t *) node)->parts;
18664  }
18665 
18666  parse_heredoc_dedent(parser, nodes, common_whitespace);
18667  }
18668  }
18669 
18670  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18671  return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18672  }
18673 
18674  return node;
18675  }
18677  parser_lex(parser);
18678  pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18679 
18680  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18681  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18682  }
18683 
18684  return node;
18685  }
18686  case PM_TOKEN_INTEGER: {
18687  pm_node_flags_t base = parser->integer_base;
18688  parser_lex(parser);
18689  return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18690  }
18692  pm_node_flags_t base = parser->integer_base;
18693  parser_lex(parser);
18694  return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18695  }
18697  pm_node_flags_t base = parser->integer_base;
18698  parser_lex(parser);
18699  return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18700  }
18702  pm_node_flags_t base = parser->integer_base;
18703  parser_lex(parser);
18704  return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18705  }
18707  parser_lex(parser);
18708  return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18710  parser_lex(parser);
18711  return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18713  parser_lex(parser);
18714  return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18715  case PM_TOKEN_KEYWORD_ALIAS: {
18716  if (binding_power != PM_BINDING_POWER_STATEMENT) {
18717  pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18718  }
18719 
18720  parser_lex(parser);
18721  pm_token_t keyword = parser->previous;
18722 
18723  pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18724  pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18725 
18726  switch (PM_NODE_TYPE(new_name)) {
18732  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18733  }
18734  } else {
18735  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18736  }
18737 
18738  return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18739  }
18740  case PM_SYMBOL_NODE:
18743  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18744  }
18745  }
18746  /* fallthrough */
18747  default:
18748  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18749  }
18750  }
18751  case PM_TOKEN_KEYWORD_CASE: {
18752  size_t opening_newline_index = token_newline_index(parser);
18753  parser_lex(parser);
18754 
18755  pm_token_t case_keyword = parser->previous;
18756  pm_node_t *predicate = NULL;
18757 
18758  pm_node_list_t current_block_exits = { 0 };
18759  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18760 
18761  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18762  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18763  predicate = NULL;
18764  } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18765  predicate = NULL;
18766  } else if (!token_begins_expression_p(parser->current.type)) {
18767  predicate = NULL;
18768  } else {
18769  predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18770  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18771  }
18772 
18773  if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18774  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18775  parser_lex(parser);
18776 
18777  pop_block_exits(parser, previous_block_exits);
18778  pm_node_list_free(&current_block_exits);
18779 
18780  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18781  return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18782  }
18783 
18784  // At this point we can create a case node, though we don't yet know
18785  // if it is a case-in or case-when node.
18786  pm_token_t end_keyword = not_provided(parser);
18787  pm_node_t *node;
18788 
18789  if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18790  pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18791  pm_static_literals_t literals = { 0 };
18792 
18793  // At this point we've seen a when keyword, so we know this is a
18794  // case-when node. We will continue to parse the when nodes
18795  // until we hit the end of the list.
18796  while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18797  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18798  parser_lex(parser);
18799 
18800  pm_token_t when_keyword = parser->previous;
18801  pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18802 
18803  do {
18804  if (accept1(parser, PM_TOKEN_USTAR)) {
18805  pm_token_t operator = parser->previous;
18806  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18807 
18808  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18809  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18810 
18811  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18812  } else {
18813  pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18814  pm_when_node_conditions_append(when_node, condition);
18815 
18816  // If we found a missing node, then this is a syntax
18817  // error and we should stop looping.
18818  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18819 
18820  // If this is a string node, then we need to mark it
18821  // as frozen because when clause strings are frozen.
18822  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18823  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18824  } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18825  pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18826  }
18827 
18828  pm_when_clause_static_literals_add(parser, &literals, condition);
18829  }
18830  } while (accept1(parser, PM_TOKEN_COMMA));
18831 
18832  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18833  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18834  pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18835  }
18836  } else {
18837  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18838  pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18839  }
18840 
18842  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18843  if (statements != NULL) {
18844  pm_when_node_statements_set(when_node, statements);
18845  }
18846  }
18847 
18848  pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18849  }
18850 
18851  // If we didn't parse any conditions (in or when) then we need
18852  // to indicate that we have an error.
18853  if (case_node->conditions.size == 0) {
18854  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18855  }
18856 
18857  pm_static_literals_free(&literals);
18858  node = (pm_node_t *) case_node;
18859  } else {
18860  pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18861 
18862  // If this is a case-match node (i.e., it is a pattern matching
18863  // case statement) then we must have a predicate.
18864  if (predicate == NULL) {
18865  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18866  }
18867 
18868  // At this point we expect that we're parsing a case-in node. We
18869  // will continue to parse the in nodes until we hit the end of
18870  // the list.
18871  while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18872  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18873 
18874  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18875  parser->pattern_matching_newlines = true;
18876 
18877  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18878  parser->command_start = false;
18879  parser_lex(parser);
18880 
18881  pm_token_t in_keyword = parser->previous;
18882 
18883  pm_constant_id_list_t captures = { 0 };
18884  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18885 
18886  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18887  pm_constant_id_list_free(&captures);
18888 
18889  // Since we're in the top-level of the case-in node we need
18890  // to check for guard clauses in the form of `if` or
18891  // `unless` statements.
18892  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18893  pm_token_t keyword = parser->previous;
18894  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18895  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18896  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18897  pm_token_t keyword = parser->previous;
18898  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18899  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18900  }
18901 
18902  // Now we need to check for the terminator of the in node's
18903  // pattern. It can be a newline or semicolon optionally
18904  // followed by a `then` keyword.
18905  pm_token_t then_keyword;
18906  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18907  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18908  then_keyword = parser->previous;
18909  } else {
18910  then_keyword = not_provided(parser);
18911  }
18912  } else {
18913  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18914  then_keyword = parser->previous;
18915  }
18916 
18917  // Now we can actually parse the statements associated with
18918  // the in node.
18919  pm_statements_node_t *statements;
18921  statements = NULL;
18922  } else {
18923  statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18924  }
18925 
18926  // Now that we have the full pattern and statements, we can
18927  // create the node and attach it to the case node.
18928  pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18929  pm_case_match_node_condition_append(case_node, condition);
18930  }
18931 
18932  // If we didn't parse any conditions (in or when) then we need
18933  // to indicate that we have an error.
18934  if (case_node->conditions.size == 0) {
18935  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18936  }
18937 
18938  node = (pm_node_t *) case_node;
18939  }
18940 
18941  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18942  if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18943  pm_token_t else_keyword = parser->previous;
18944  pm_else_node_t *else_node;
18945 
18946  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18947  else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18948  } else {
18949  else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18950  }
18951 
18952  if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18953  pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18954  } else {
18955  pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18956  }
18957  }
18958 
18959  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18960  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18961 
18962  if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18963  pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18964  } else {
18965  pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18966  }
18967 
18968  pop_block_exits(parser, previous_block_exits);
18969  pm_node_list_free(&current_block_exits);
18970 
18971  return node;
18972  }
18973  case PM_TOKEN_KEYWORD_BEGIN: {
18974  size_t opening_newline_index = token_newline_index(parser);
18975  parser_lex(parser);
18976 
18977  pm_token_t begin_keyword = parser->previous;
18978  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18979 
18980  pm_node_list_t current_block_exits = { 0 };
18981  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18982  pm_statements_node_t *begin_statements = NULL;
18983 
18985  pm_accepts_block_stack_push(parser, true);
18986  begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18987  pm_accepts_block_stack_pop(parser);
18988  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18989  }
18990 
18991  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18992  parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18993  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
18994 
18995  begin_node->base.location.end = parser->previous.end;
18996  pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18997 
18998  pop_block_exits(parser, previous_block_exits);
18999  pm_node_list_free(&current_block_exits);
19000 
19001  return (pm_node_t *) begin_node;
19002  }
19004  pm_node_list_t current_block_exits = { 0 };
19005  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19006 
19007  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19008  pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
19009  }
19010 
19011  parser_lex(parser);
19012  pm_token_t keyword = parser->previous;
19013 
19014  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
19015  pm_token_t opening = parser->previous;
19016  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
19017 
19018  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19019  pm_context_t context = parser->current_context->context;
19020  if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19021  pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19022  }
19023 
19024  flush_block_exits(parser, previous_block_exits);
19025  pm_node_list_free(&current_block_exits);
19026 
19027  return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19028  }
19030  case PM_TOKEN_KEYWORD_NEXT:
19031  case PM_TOKEN_KEYWORD_RETURN: {
19032  parser_lex(parser);
19033 
19034  pm_token_t keyword = parser->previous;
19035  pm_arguments_t arguments = { 0 };
19036 
19037  if (
19038  token_begins_expression_p(parser->current.type) ||
19039  match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19040  ) {
19041  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19042 
19043  if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19044  parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19045  }
19046  }
19047 
19048  switch (keyword.type) {
19049  case PM_TOKEN_KEYWORD_BREAK: {
19050  pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19051  if (!parser->partial_script) parse_block_exit(parser, node);
19052  return node;
19053  }
19054  case PM_TOKEN_KEYWORD_NEXT: {
19055  pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19056  if (!parser->partial_script) parse_block_exit(parser, node);
19057  return node;
19058  }
19059  case PM_TOKEN_KEYWORD_RETURN: {
19060  pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19061  parse_return(parser, node);
19062  return node;
19063  }
19064  default:
19065  assert(false && "unreachable");
19066  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19067  }
19068  }
19069  case PM_TOKEN_KEYWORD_SUPER: {
19070  parser_lex(parser);
19071 
19072  pm_token_t keyword = parser->previous;
19073  pm_arguments_t arguments = { 0 };
19074  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19075 
19076  if (
19077  arguments.opening_loc.start == NULL &&
19078  arguments.arguments == NULL &&
19079  ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19080  ) {
19081  return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19082  }
19083 
19084  return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19085  }
19086  case PM_TOKEN_KEYWORD_YIELD: {
19087  parser_lex(parser);
19088 
19089  pm_token_t keyword = parser->previous;
19090  pm_arguments_t arguments = { 0 };
19091  parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19092 
19093  // It's possible that we've parsed a block argument through our
19094  // call to parse_arguments_list. If we found one, we should mark it
19095  // as invalid and destroy it, as we don't have a place for it on the
19096  // yield node.
19097  if (arguments.block != NULL) {
19098  pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19099  pm_node_destroy(parser, arguments.block);
19100  arguments.block = NULL;
19101  }
19102 
19103  pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19104  if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19105 
19106  return node;
19107  }
19108  case PM_TOKEN_KEYWORD_CLASS: {
19109  size_t opening_newline_index = token_newline_index(parser);
19110  parser_lex(parser);
19111 
19112  pm_token_t class_keyword = parser->previous;
19113  pm_do_loop_stack_push(parser, false);
19114 
19115  pm_node_list_t current_block_exits = { 0 };
19116  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19117 
19118  if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19119  pm_token_t operator = parser->previous;
19120  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19121 
19122  pm_parser_scope_push(parser, true);
19123  if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19124  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19125  }
19126 
19127  pm_node_t *statements = NULL;
19129  pm_accepts_block_stack_push(parser, true);
19130  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19131  pm_accepts_block_stack_pop(parser);
19132  }
19133 
19134  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19135  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19136  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19137  } else {
19138  parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19139  }
19140 
19141  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19142 
19143  pm_constant_id_list_t locals;
19144  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19145 
19146  pm_parser_scope_pop(parser);
19147  pm_do_loop_stack_pop(parser);
19148 
19149  flush_block_exits(parser, previous_block_exits);
19150  pm_node_list_free(&current_block_exits);
19151 
19152  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19153  }
19154 
19155  pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19156  pm_token_t name = parser->previous;
19157  if (name.type != PM_TOKEN_CONSTANT) {
19158  pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19159  }
19160 
19161  pm_token_t inheritance_operator;
19162  pm_node_t *superclass;
19163 
19164  if (match1(parser, PM_TOKEN_LESS)) {
19165  inheritance_operator = parser->current;
19166  lex_state_set(parser, PM_LEX_STATE_BEG);
19167 
19168  parser->command_start = true;
19169  parser_lex(parser);
19170 
19171  superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19172  } else {
19173  inheritance_operator = not_provided(parser);
19174  superclass = NULL;
19175  }
19176 
19177  pm_parser_scope_push(parser, true);
19178 
19179  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19180  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19181  } else {
19182  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19183  }
19184  pm_node_t *statements = NULL;
19185 
19187  pm_accepts_block_stack_push(parser, true);
19188  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19189  pm_accepts_block_stack_pop(parser);
19190  }
19191 
19192  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19193  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19194  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19195  } else {
19196  parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19197  }
19198 
19199  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19200 
19201  if (context_def_p(parser)) {
19202  pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19203  }
19204 
19205  pm_constant_id_list_t locals;
19206  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19207 
19208  pm_parser_scope_pop(parser);
19209  pm_do_loop_stack_pop(parser);
19210 
19211  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19212  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19213  }
19214 
19215  pop_block_exits(parser, previous_block_exits);
19216  pm_node_list_free(&current_block_exits);
19217 
19218  return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19219  }
19220  case PM_TOKEN_KEYWORD_DEF: {
19221  pm_node_list_t current_block_exits = { 0 };
19222  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19223 
19224  pm_token_t def_keyword = parser->current;
19225  size_t opening_newline_index = token_newline_index(parser);
19226 
19227  pm_node_t *receiver = NULL;
19228  pm_token_t operator = not_provided(parser);
19229  pm_token_t name;
19230 
19231  // This context is necessary for lexing `...` in a bare params
19232  // correctly. It must be pushed before lexing the first param, so it
19233  // is here.
19234  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19235  parser_lex(parser);
19236 
19237  // This will be false if the method name is not a valid identifier
19238  // but could be followed by an operator.
19239  bool valid_name = true;
19240 
19241  switch (parser->current.type) {
19242  case PM_CASE_OPERATOR:
19243  pm_parser_scope_push(parser, true);
19244  lex_state_set(parser, PM_LEX_STATE_ENDFN);
19245  parser_lex(parser);
19246 
19247  name = parser->previous;
19248  break;
19249  case PM_TOKEN_IDENTIFIER: {
19250  parser_lex(parser);
19251 
19252  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19253  receiver = parse_variable_call(parser);
19254 
19255  pm_parser_scope_push(parser, true);
19256  lex_state_set(parser, PM_LEX_STATE_FNAME);
19257  parser_lex(parser);
19258 
19259  operator = parser->previous;
19260  name = parse_method_definition_name(parser);
19261  } else {
19262  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19263  pm_parser_scope_push(parser, true);
19264 
19265  name = parser->previous;
19266  }
19267 
19268  break;
19269  }
19273  valid_name = false;
19274  /* fallthrough */
19275  case PM_TOKEN_CONSTANT:
19276  case PM_TOKEN_KEYWORD_NIL:
19277  case PM_TOKEN_KEYWORD_SELF:
19278  case PM_TOKEN_KEYWORD_TRUE:
19283  pm_parser_scope_push(parser, true);
19284  parser_lex(parser);
19285 
19286  pm_token_t identifier = parser->previous;
19287 
19288  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19289  lex_state_set(parser, PM_LEX_STATE_FNAME);
19290  parser_lex(parser);
19291  operator = parser->previous;
19292 
19293  switch (identifier.type) {
19294  case PM_TOKEN_CONSTANT:
19295  receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19296  break;
19298  receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19299  break;
19301  receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19302  break;
19304  receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19305  break;
19306  case PM_TOKEN_KEYWORD_NIL:
19307  receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19308  break;
19309  case PM_TOKEN_KEYWORD_SELF:
19310  receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19311  break;
19312  case PM_TOKEN_KEYWORD_TRUE:
19313  receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19314  break;
19316  receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19317  break;
19319  receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19320  break;
19322  receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19323  break;
19325  receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19326  break;
19327  default:
19328  break;
19329  }
19330 
19331  name = parse_method_definition_name(parser);
19332  } else {
19333  if (!valid_name) {
19334  PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19335  }
19336 
19337  name = identifier;
19338  }
19339  break;
19340  }
19342  // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19343  // the inner expression of this parenthesis should not be
19344  // processed under this context. Thus, the context is popped
19345  // here.
19346  context_pop(parser);
19347  parser_lex(parser);
19348 
19349  pm_token_t lparen = parser->previous;
19350  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19351 
19352  accept1(parser, PM_TOKEN_NEWLINE);
19353  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19354  pm_token_t rparen = parser->previous;
19355 
19356  lex_state_set(parser, PM_LEX_STATE_FNAME);
19357  expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19358 
19359  operator = parser->previous;
19360  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19361 
19362  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19363  // reason as described the above.
19364  pm_parser_scope_push(parser, true);
19365  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19366  name = parse_method_definition_name(parser);
19367  break;
19368  }
19369  default:
19370  pm_parser_scope_push(parser, true);
19371  name = parse_method_definition_name(parser);
19372  break;
19373  }
19374 
19375  pm_token_t lparen;
19376  pm_token_t rparen;
19377  pm_parameters_node_t *params;
19378 
19379  switch (parser->current.type) {
19381  parser_lex(parser);
19382  lparen = parser->previous;
19383 
19384  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19385  params = NULL;
19386  } else {
19387  params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19388  }
19389 
19390  lex_state_set(parser, PM_LEX_STATE_BEG);
19391  parser->command_start = true;
19392 
19393  context_pop(parser);
19394  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19395  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19396  parser->previous.start = parser->previous.end;
19397  parser->previous.type = PM_TOKEN_MISSING;
19398  }
19399 
19400  rparen = parser->previous;
19401  break;
19402  }
19403  case PM_CASE_PARAMETER: {
19404  // If we're about to lex a label, we need to add the label
19405  // state to make sure the next newline is ignored.
19406  if (parser->current.type == PM_TOKEN_LABEL) {
19407  lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19408  }
19409 
19410  lparen = not_provided(parser);
19411  rparen = not_provided(parser);
19412  params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19413 
19414  context_pop(parser);
19415  break;
19416  }
19417  default: {
19418  lparen = not_provided(parser);
19419  rparen = not_provided(parser);
19420  params = NULL;
19421 
19422  context_pop(parser);
19423  break;
19424  }
19425  }
19426 
19427  pm_node_t *statements = NULL;
19428  pm_token_t equal;
19429  pm_token_t end_keyword;
19430 
19431  if (accept1(parser, PM_TOKEN_EQUAL)) {
19432  if (token_is_setter_name(&name)) {
19433  pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19434  }
19435  equal = parser->previous;
19436 
19437  context_push(parser, PM_CONTEXT_DEF);
19438  pm_do_loop_stack_push(parser, false);
19439  statements = (pm_node_t *) pm_statements_node_create(parser);
19440 
19441  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19442 
19443  if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19444  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19445 
19446  pm_token_t rescue_keyword = parser->previous;
19447  pm_node_t *value = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19448  context_pop(parser);
19449 
19450  statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19451  }
19452 
19453  pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19454  pm_do_loop_stack_pop(parser);
19455  context_pop(parser);
19456  end_keyword = not_provided(parser);
19457  } else {
19458  equal = not_provided(parser);
19459 
19460  if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19461  lex_state_set(parser, PM_LEX_STATE_BEG);
19462  parser->command_start = true;
19463  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19464  } else {
19465  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19466  }
19467 
19468  pm_accepts_block_stack_push(parser, true);
19469  pm_do_loop_stack_push(parser, false);
19470 
19472  pm_accepts_block_stack_push(parser, true);
19473  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19474  pm_accepts_block_stack_pop(parser);
19475  }
19476 
19478  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19479  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19480  } else {
19481  parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19482  }
19483 
19484  pm_accepts_block_stack_pop(parser);
19485  pm_do_loop_stack_pop(parser);
19486 
19487  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19488  end_keyword = parser->previous;
19489  }
19490 
19491  pm_constant_id_list_t locals;
19492  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19493  pm_parser_scope_pop(parser);
19494 
19500  pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19501 
19502  flush_block_exits(parser, previous_block_exits);
19503  pm_node_list_free(&current_block_exits);
19504 
19505  return (pm_node_t *) pm_def_node_create(
19506  parser,
19507  name_id,
19508  &name,
19509  receiver,
19510  params,
19511  statements,
19512  &locals,
19513  &def_keyword,
19514  &operator,
19515  &lparen,
19516  &rparen,
19517  &equal,
19518  &end_keyword
19519  );
19520  }
19521  case PM_TOKEN_KEYWORD_DEFINED: {
19522  parser_lex(parser);
19523  pm_token_t keyword = parser->previous;
19524 
19525  pm_token_t lparen;
19526  pm_token_t rparen;
19527  pm_node_t *expression;
19528  context_push(parser, PM_CONTEXT_DEFINED);
19529 
19530  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19531  lparen = parser->previous;
19532  expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19533 
19534  if (parser->recovering) {
19535  rparen = not_provided(parser);
19536  } else {
19537  accept1(parser, PM_TOKEN_NEWLINE);
19538  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19539  rparen = parser->previous;
19540  }
19541  } else {
19542  lparen = not_provided(parser);
19543  rparen = not_provided(parser);
19544  expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19545  }
19546 
19547  context_pop(parser);
19548  return (pm_node_t *) pm_defined_node_create(
19549  parser,
19550  &lparen,
19551  expression,
19552  &rparen,
19553  &PM_LOCATION_TOKEN_VALUE(&keyword)
19554  );
19555  }
19557  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19558  pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19559  }
19560 
19561  parser_lex(parser);
19562  pm_token_t keyword = parser->previous;
19563 
19564  if (context_def_p(parser)) {
19565  pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19566  }
19567 
19568  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19569  pm_token_t opening = parser->previous;
19570  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19571 
19572  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19573  return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19574  }
19576  parser_lex(parser);
19577  return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19578  case PM_TOKEN_KEYWORD_FOR: {
19579  size_t opening_newline_index = token_newline_index(parser);
19580  parser_lex(parser);
19581 
19582  pm_token_t for_keyword = parser->previous;
19583  pm_node_t *index;
19584 
19585  context_push(parser, PM_CONTEXT_FOR_INDEX);
19586 
19587  // First, parse out the first index expression.
19588  if (accept1(parser, PM_TOKEN_USTAR)) {
19589  pm_token_t star_operator = parser->previous;
19590  pm_node_t *name = NULL;
19591 
19592  if (token_begins_expression_p(parser->current.type)) {
19593  name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19594  }
19595 
19596  index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19597  } else if (token_begins_expression_p(parser->current.type)) {
19598  index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19599  } else {
19600  pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19601  index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19602  }
19603 
19604  // Now, if there are multiple index expressions, parse them out.
19605  if (match1(parser, PM_TOKEN_COMMA)) {
19606  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19607  } else {
19608  index = parse_target(parser, index, false, false);
19609  }
19610 
19611  context_pop(parser);
19612  pm_do_loop_stack_push(parser, true);
19613 
19614  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19615  pm_token_t in_keyword = parser->previous;
19616 
19617  pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19618  pm_do_loop_stack_pop(parser);
19619 
19620  pm_token_t do_keyword;
19621  if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19622  do_keyword = parser->previous;
19623  } else {
19624  do_keyword = not_provided(parser);
19625  if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19626  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19627  }
19628  }
19629 
19630  pm_statements_node_t *statements = NULL;
19631  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19632  statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19633  }
19634 
19635  parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19636  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19637 
19638  return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19639  }
19640  case PM_TOKEN_KEYWORD_IF:
19641  if (parser_end_of_line_p(parser)) {
19642  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19643  }
19644 
19645  size_t opening_newline_index = token_newline_index(parser);
19646  bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19647  parser_lex(parser);
19648 
19649  return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19650  case PM_TOKEN_KEYWORD_UNDEF: {
19651  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19652  pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19653  }
19654 
19655  parser_lex(parser);
19656  pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19657  pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19658 
19659  if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19660  pm_node_destroy(parser, name);
19661  } else {
19662  pm_undef_node_append(undef, name);
19663 
19664  while (match1(parser, PM_TOKEN_COMMA)) {
19665  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19666  parser_lex(parser);
19667  name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19668 
19669  if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19670  pm_node_destroy(parser, name);
19671  break;
19672  }
19673 
19674  pm_undef_node_append(undef, name);
19675  }
19676  }
19677 
19678  return (pm_node_t *) undef;
19679  }
19680  case PM_TOKEN_KEYWORD_NOT: {
19681  parser_lex(parser);
19682 
19683  pm_token_t message = parser->previous;
19684  pm_arguments_t arguments = { 0 };
19685  pm_node_t *receiver = NULL;
19686 
19687  accept1(parser, PM_TOKEN_NEWLINE);
19688 
19689  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19690  arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19691 
19692  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19693  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19694  } else {
19695  receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19696 
19697  if (!parser->recovering) {
19698  accept1(parser, PM_TOKEN_NEWLINE);
19699  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19700  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19701  }
19702  }
19703  } else {
19704  receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19705  }
19706 
19707  return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19708  }
19709  case PM_TOKEN_KEYWORD_UNLESS: {
19710  size_t opening_newline_index = token_newline_index(parser);
19711  parser_lex(parser);
19712 
19713  return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19714  }
19715  case PM_TOKEN_KEYWORD_MODULE: {
19716  pm_node_list_t current_block_exits = { 0 };
19717  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19718 
19719  size_t opening_newline_index = token_newline_index(parser);
19720  parser_lex(parser);
19721  pm_token_t module_keyword = parser->previous;
19722 
19723  pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19724  pm_token_t name;
19725 
19726  // If we can recover from a syntax error that occurred while parsing
19727  // the name of the module, then we'll handle that here.
19728  if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19729  pop_block_exits(parser, previous_block_exits);
19730  pm_node_list_free(&current_block_exits);
19731 
19732  pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19733  return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19734  }
19735 
19736  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19737  pm_token_t double_colon = parser->previous;
19738 
19739  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19740  constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19741  }
19742 
19743  // Here we retrieve the name of the module. If it wasn't a constant,
19744  // then it's possible that `module foo` was passed, which is a
19745  // syntax error. We handle that here as well.
19746  name = parser->previous;
19747  if (name.type != PM_TOKEN_CONSTANT) {
19748  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19749  }
19750 
19751  pm_parser_scope_push(parser, true);
19752  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19753  pm_node_t *statements = NULL;
19754 
19756  pm_accepts_block_stack_push(parser, true);
19757  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19758  pm_accepts_block_stack_pop(parser);
19759  }
19760 
19762  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19763  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19764  } else {
19765  parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19766  }
19767 
19768  pm_constant_id_list_t locals;
19769  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19770 
19771  pm_parser_scope_pop(parser);
19772  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19773 
19774  if (context_def_p(parser)) {
19775  pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19776  }
19777 
19778  pop_block_exits(parser, previous_block_exits);
19779  pm_node_list_free(&current_block_exits);
19780 
19781  return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19782  }
19783  case PM_TOKEN_KEYWORD_NIL:
19784  parser_lex(parser);
19785  return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19786  case PM_TOKEN_KEYWORD_REDO: {
19787  parser_lex(parser);
19788 
19789  pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19790  if (!parser->partial_script) parse_block_exit(parser, node);
19791 
19792  return node;
19793  }
19794  case PM_TOKEN_KEYWORD_RETRY: {
19795  parser_lex(parser);
19796 
19797  pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19798  parse_retry(parser, node);
19799 
19800  return node;
19801  }
19802  case PM_TOKEN_KEYWORD_SELF:
19803  parser_lex(parser);
19804  return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19805  case PM_TOKEN_KEYWORD_TRUE:
19806  parser_lex(parser);
19807  return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19808  case PM_TOKEN_KEYWORD_UNTIL: {
19809  size_t opening_newline_index = token_newline_index(parser);
19810 
19811  context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19812  pm_do_loop_stack_push(parser, true);
19813 
19814  parser_lex(parser);
19815  pm_token_t keyword = parser->previous;
19816  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19817 
19818  pm_do_loop_stack_pop(parser);
19819  context_pop(parser);
19820 
19821  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19822  pm_statements_node_t *statements = NULL;
19823 
19824  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19825  pm_accepts_block_stack_push(parser, true);
19826  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19827  pm_accepts_block_stack_pop(parser);
19828  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19829  }
19830 
19831  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19832  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19833 
19834  return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19835  }
19836  case PM_TOKEN_KEYWORD_WHILE: {
19837  size_t opening_newline_index = token_newline_index(parser);
19838 
19839  context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19840  pm_do_loop_stack_push(parser, true);
19841 
19842  parser_lex(parser);
19843  pm_token_t keyword = parser->previous;
19844  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19845 
19846  pm_do_loop_stack_pop(parser);
19847  context_pop(parser);
19848 
19849  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19850  pm_statements_node_t *statements = NULL;
19851 
19852  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19853  pm_accepts_block_stack_push(parser, true);
19854  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19855  pm_accepts_block_stack_pop(parser);
19856  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19857  }
19858 
19859  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19860  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19861 
19862  return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19863  }
19864  case PM_TOKEN_PERCENT_LOWER_I: {
19865  parser_lex(parser);
19866  pm_token_t opening = parser->previous;
19867  pm_array_node_t *array = pm_array_node_create(parser, &opening);
19868 
19869  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19870  accept1(parser, PM_TOKEN_WORDS_SEP);
19871  if (match1(parser, PM_TOKEN_STRING_END)) break;
19872 
19873  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19874  pm_token_t opening = not_provided(parser);
19875  pm_token_t closing = not_provided(parser);
19876  pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19877  }
19878 
19879  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19880  }
19881 
19882  pm_token_t closing = parser->current;
19883  if (match1(parser, PM_TOKEN_EOF)) {
19884  pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19885  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19886  } else {
19887  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19888  }
19889  pm_array_node_close_set(array, &closing);
19890 
19891  return (pm_node_t *) array;
19892  }
19893  case PM_TOKEN_PERCENT_UPPER_I: {
19894  parser_lex(parser);
19895  pm_token_t opening = parser->previous;
19896  pm_array_node_t *array = pm_array_node_create(parser, &opening);
19897 
19898  // This is the current node that we are parsing that will be added to the
19899  // list of elements.
19900  pm_node_t *current = NULL;
19901 
19902  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19903  switch (parser->current.type) {
19904  case PM_TOKEN_WORDS_SEP: {
19905  if (current == NULL) {
19906  // If we hit a separator before we have any content, then we don't
19907  // need to do anything.
19908  } else {
19909  // If we hit a separator after we've hit content, then we need to
19910  // append that content to the list and reset the current node.
19911  pm_array_node_elements_append(array, current);
19912  current = NULL;
19913  }
19914 
19915  parser_lex(parser);
19916  break;
19917  }
19918  case PM_TOKEN_STRING_CONTENT: {
19919  pm_token_t opening = not_provided(parser);
19920  pm_token_t closing = not_provided(parser);
19921 
19922  if (current == NULL) {
19923  // If we hit content and the current node is NULL, then this is
19924  // the first string content we've seen. In that case we're going
19925  // to create a new string node and set that to the current.
19926  current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
19927  parser_lex(parser);
19928  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19929  // If we hit string content and the current node is an
19930  // interpolated string, then we need to append the string content
19931  // to the list of child nodes.
19932  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
19933  parser_lex(parser);
19934 
19935  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19936  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19937  // If we hit string content and the current node is a symbol node,
19938  // then we need to convert the current node into an interpolated
19939  // string and add the string content to the list of child nodes.
19940  pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19941  pm_token_t bounds = not_provided(parser);
19942 
19943  pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19944  pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
19945  pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
19946  parser_lex(parser);
19947 
19948  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19949  pm_interpolated_symbol_node_append(interpolated, first_string);
19950  pm_interpolated_symbol_node_append(interpolated, second_string);
19951 
19952  xfree(current);
19953  current = (pm_node_t *) interpolated;
19954  } else {
19955  assert(false && "unreachable");
19956  }
19957 
19958  break;
19959  }
19960  case PM_TOKEN_EMBVAR: {
19961  bool start_location_set = false;
19962  if (current == NULL) {
19963  // If we hit an embedded variable and the current node is NULL,
19964  // then this is the start of a new string. We'll set the current
19965  // node to a new interpolated string.
19966  pm_token_t opening = not_provided(parser);
19967  pm_token_t closing = not_provided(parser);
19968  current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19969  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19970  // If we hit an embedded variable and the current node is a string
19971  // node, then we'll convert the current into an interpolated
19972  // string and add the string node to the list of parts.
19973  pm_token_t opening = not_provided(parser);
19974  pm_token_t closing = not_provided(parser);
19975  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19976 
19977  current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
19978  pm_interpolated_symbol_node_append(interpolated, current);
19979  interpolated->base.location.start = current->location.start;
19980  start_location_set = true;
19981  current = (pm_node_t *) interpolated;
19982  } else {
19983  // If we hit an embedded variable and the current node is an
19984  // interpolated string, then we'll just add the embedded variable.
19985  }
19986 
19987  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19988  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19989  if (!start_location_set) {
19990  current->location.start = part->location.start;
19991  }
19992  break;
19993  }
19994  case PM_TOKEN_EMBEXPR_BEGIN: {
19995  bool start_location_set = false;
19996  if (current == NULL) {
19997  // If we hit an embedded expression and the current node is NULL,
19998  // then this is the start of a new string. We'll set the current
19999  // node to a new interpolated string.
20000  pm_token_t opening = not_provided(parser);
20001  pm_token_t closing = not_provided(parser);
20002  current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20003  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
20004  // If we hit an embedded expression and the current node is a
20005  // string node, then we'll convert the current into an
20006  // interpolated string and add the string node to the list of
20007  // parts.
20008  pm_token_t opening = not_provided(parser);
20009  pm_token_t closing = not_provided(parser);
20010  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
20011 
20012  current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
20013  pm_interpolated_symbol_node_append(interpolated, current);
20014  interpolated->base.location.start = current->location.start;
20015  start_location_set = true;
20016  current = (pm_node_t *) interpolated;
20017  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
20018  // If we hit an embedded expression and the current node is an
20019  // interpolated string, then we'll just continue on.
20020  } else {
20021  assert(false && "unreachable");
20022  }
20023 
20024  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20025  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20026  if (!start_location_set) {
20027  current->location.start = part->location.start;
20028  }
20029  break;
20030  }
20031  default:
20032  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20033  parser_lex(parser);
20034  break;
20035  }
20036  }
20037 
20038  // If we have a current node, then we need to append it to the list.
20039  if (current) {
20040  pm_array_node_elements_append(array, current);
20041  }
20042 
20043  pm_token_t closing = parser->current;
20044  if (match1(parser, PM_TOKEN_EOF)) {
20045  pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20046  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20047  } else {
20048  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20049  }
20050  pm_array_node_close_set(array, &closing);
20051 
20052  return (pm_node_t *) array;
20053  }
20054  case PM_TOKEN_PERCENT_LOWER_W: {
20055  parser_lex(parser);
20056  pm_token_t opening = parser->previous;
20057  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20058 
20059  // skip all leading whitespaces
20060  accept1(parser, PM_TOKEN_WORDS_SEP);
20061 
20062  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20063  accept1(parser, PM_TOKEN_WORDS_SEP);
20064  if (match1(parser, PM_TOKEN_STRING_END)) break;
20065 
20066  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20067  pm_token_t opening = not_provided(parser);
20068  pm_token_t closing = not_provided(parser);
20069 
20070  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20071  pm_array_node_elements_append(array, string);
20072  }
20073 
20074  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20075  }
20076 
20077  pm_token_t closing = parser->current;
20078  if (match1(parser, PM_TOKEN_EOF)) {
20079  pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20080  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20081  } else {
20082  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20083  }
20084 
20085  pm_array_node_close_set(array, &closing);
20086  return (pm_node_t *) array;
20087  }
20088  case PM_TOKEN_PERCENT_UPPER_W: {
20089  parser_lex(parser);
20090  pm_token_t opening = parser->previous;
20091  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20092 
20093  // This is the current node that we are parsing that will be added
20094  // to the list of elements.
20095  pm_node_t *current = NULL;
20096 
20097  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20098  switch (parser->current.type) {
20099  case PM_TOKEN_WORDS_SEP: {
20100  // Reset the explicit encoding if we hit a separator
20101  // since each element can have its own encoding.
20102  parser->explicit_encoding = NULL;
20103 
20104  if (current == NULL) {
20105  // If we hit a separator before we have any content,
20106  // then we don't need to do anything.
20107  } else {
20108  // If we hit a separator after we've hit content,
20109  // then we need to append that content to the list
20110  // and reset the current node.
20111  pm_array_node_elements_append(array, current);
20112  current = NULL;
20113  }
20114 
20115  parser_lex(parser);
20116  break;
20117  }
20118  case PM_TOKEN_STRING_CONTENT: {
20119  pm_token_t opening = not_provided(parser);
20120  pm_token_t closing = not_provided(parser);
20121 
20122  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20123  pm_node_flag_set(string, parse_unescaped_encoding(parser));
20124  parser_lex(parser);
20125 
20126  if (current == NULL) {
20127  // If we hit content and the current node is NULL,
20128  // then this is the first string content we've seen.
20129  // In that case we're going to create a new string
20130  // node and set that to the current.
20131  current = string;
20132  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20133  // If we hit string content and the current node is
20134  // an interpolated string, then we need to append
20135  // the string content to the list of child nodes.
20136  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20137  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20138  // If we hit string content and the current node is
20139  // a string node, then we need to convert the
20140  // current node into an interpolated string and add
20141  // the string content to the list of child nodes.
20142  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20143  pm_interpolated_string_node_append(interpolated, current);
20144  pm_interpolated_string_node_append(interpolated, string);
20145  current = (pm_node_t *) interpolated;
20146  } else {
20147  assert(false && "unreachable");
20148  }
20149 
20150  break;
20151  }
20152  case PM_TOKEN_EMBVAR: {
20153  if (current == NULL) {
20154  // If we hit an embedded variable and the current
20155  // node is NULL, then this is the start of a new
20156  // string. We'll set the current node to a new
20157  // interpolated string.
20158  pm_token_t opening = not_provided(parser);
20159  pm_token_t closing = not_provided(parser);
20160  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20161  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20162  // If we hit an embedded variable and the current
20163  // node is a string node, then we'll convert the
20164  // current into an interpolated string and add the
20165  // string node to the list of parts.
20166  pm_token_t opening = not_provided(parser);
20167  pm_token_t closing = not_provided(parser);
20168  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20169  pm_interpolated_string_node_append(interpolated, current);
20170  current = (pm_node_t *) interpolated;
20171  } else {
20172  // If we hit an embedded variable and the current
20173  // node is an interpolated string, then we'll just
20174  // add the embedded variable.
20175  }
20176 
20177  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20178  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20179  break;
20180  }
20181  case PM_TOKEN_EMBEXPR_BEGIN: {
20182  if (current == NULL) {
20183  // If we hit an embedded expression and the current
20184  // node is NULL, then this is the start of a new
20185  // string. We'll set the current node to a new
20186  // interpolated string.
20187  pm_token_t opening = not_provided(parser);
20188  pm_token_t closing = not_provided(parser);
20189  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20190  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20191  // If we hit an embedded expression and the current
20192  // node is a string node, then we'll convert the
20193  // current into an interpolated string and add the
20194  // string node to the list of parts.
20195  pm_token_t opening = not_provided(parser);
20196  pm_token_t closing = not_provided(parser);
20197  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20198  pm_interpolated_string_node_append(interpolated, current);
20199  current = (pm_node_t *) interpolated;
20200  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20201  // If we hit an embedded expression and the current
20202  // node is an interpolated string, then we'll just
20203  // continue on.
20204  } else {
20205  assert(false && "unreachable");
20206  }
20207 
20208  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20209  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20210  break;
20211  }
20212  default:
20213  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20214  parser_lex(parser);
20215  break;
20216  }
20217  }
20218 
20219  // If we have a current node, then we need to append it to the list.
20220  if (current) {
20221  pm_array_node_elements_append(array, current);
20222  }
20223 
20224  pm_token_t closing = parser->current;
20225  if (match1(parser, PM_TOKEN_EOF)) {
20226  pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20227  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20228  } else {
20229  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20230  }
20231 
20232  pm_array_node_close_set(array, &closing);
20233  return (pm_node_t *) array;
20234  }
20235  case PM_TOKEN_REGEXP_BEGIN: {
20236  pm_token_t opening = parser->current;
20237  parser_lex(parser);
20238 
20239  if (match1(parser, PM_TOKEN_REGEXP_END)) {
20240  // If we get here, then we have an end immediately after a start. In
20241  // that case we'll create an empty content token and return an
20242  // uninterpolated regular expression.
20243  pm_token_t content = (pm_token_t) {
20245  .start = parser->previous.end,
20246  .end = parser->previous.end
20247  };
20248 
20249  parser_lex(parser);
20250 
20251  pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20253 
20254  return node;
20255  }
20256 
20258 
20259  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20260  // In this case we've hit string content so we know the regular
20261  // expression at least has something in it. We'll need to check if the
20262  // following token is the end (in which case we can return a plain
20263  // regular expression) or if it's not then it has interpolation.
20264  pm_string_t unescaped = parser->current_string;
20265  pm_token_t content = parser->current;
20266  bool ascii_only = parser->current_regular_expression_ascii_only;
20267  parser_lex(parser);
20268 
20269  // If we hit an end, then we can create a regular expression
20270  // node without interpolation, which can be represented more
20271  // succinctly and more easily compiled.
20272  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20273  pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20274 
20275  // If we're not immediately followed by a =~, then we want
20276  // to parse all of the errors at this point. If it is
20277  // followed by a =~, then it will get parsed higher up while
20278  // parsing the named captures as well.
20279  if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20280  parse_regular_expression_errors(parser, node);
20281  }
20282 
20283  pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20284  return (pm_node_t *) node;
20285  }
20286 
20287  // If we get here, then we have interpolation so we'll need to create
20288  // a regular expression node with interpolation.
20289  interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20290 
20291  pm_token_t opening = not_provided(parser);
20292  pm_token_t closing = not_provided(parser);
20293  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20294 
20295  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20296  // This is extremely strange, but the first string part of a
20297  // regular expression will always be tagged as binary if we
20298  // are in a US-ASCII file, no matter its contents.
20299  pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20300  }
20301 
20302  pm_interpolated_regular_expression_node_append(interpolated, part);
20303  } else {
20304  // If the first part of the body of the regular expression is not a
20305  // string content, then we have interpolation and we need to create an
20306  // interpolated regular expression node.
20307  interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20308  }
20309 
20310  // Now that we're here and we have interpolation, we'll parse all of the
20311  // parts into the list.
20312  pm_node_t *part;
20313  while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20314  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20315  pm_interpolated_regular_expression_node_append(interpolated, part);
20316  }
20317  }
20318 
20319  pm_token_t closing = parser->current;
20320  if (match1(parser, PM_TOKEN_EOF)) {
20321  pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20322  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20323  } else {
20324  expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20325  }
20326 
20327  pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20328  return (pm_node_t *) interpolated;
20329  }
20330  case PM_TOKEN_BACKTICK:
20331  case PM_TOKEN_PERCENT_LOWER_X: {
20332  parser_lex(parser);
20333  pm_token_t opening = parser->previous;
20334 
20335  // When we get here, we don't know if this string is going to have
20336  // interpolation or not, even though it is allowed. Still, we want to be
20337  // able to return a string node without interpolation if we can since
20338  // it'll be faster.
20339  if (match1(parser, PM_TOKEN_STRING_END)) {
20340  // If we get here, then we have an end immediately after a start. In
20341  // that case we'll create an empty content token and return an
20342  // uninterpolated string.
20343  pm_token_t content = (pm_token_t) {
20345  .start = parser->previous.end,
20346  .end = parser->previous.end
20347  };
20348 
20349  parser_lex(parser);
20350  return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20351  }
20352 
20354 
20355  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20356  // In this case we've hit string content so we know the string
20357  // at least has something in it. We'll need to check if the
20358  // following token is the end (in which case we can return a
20359  // plain string) or if it's not then it has interpolation.
20360  pm_string_t unescaped = parser->current_string;
20361  pm_token_t content = parser->current;
20362  parser_lex(parser);
20363 
20364  if (match1(parser, PM_TOKEN_STRING_END)) {
20365  pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20366  pm_node_flag_set(node, parse_unescaped_encoding(parser));
20367  parser_lex(parser);
20368  return node;
20369  }
20370 
20371  // If we get here, then we have interpolation so we'll need to
20372  // create a string node with interpolation.
20373  node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20374 
20375  pm_token_t opening = not_provided(parser);
20376  pm_token_t closing = not_provided(parser);
20377 
20378  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20379  pm_node_flag_set(part, parse_unescaped_encoding(parser));
20380 
20381  pm_interpolated_xstring_node_append(node, part);
20382  } else {
20383  // If the first part of the body of the string is not a string
20384  // content, then we have interpolation and we need to create an
20385  // interpolated string node.
20386  node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20387  }
20388 
20389  pm_node_t *part;
20390  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20391  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20392  pm_interpolated_xstring_node_append(node, part);
20393  }
20394  }
20395 
20396  pm_token_t closing = parser->current;
20397  if (match1(parser, PM_TOKEN_EOF)) {
20398  pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20399  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20400  } else {
20401  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20402  }
20403  pm_interpolated_xstring_node_closing_set(node, &closing);
20404 
20405  return (pm_node_t *) node;
20406  }
20407  case PM_TOKEN_USTAR: {
20408  parser_lex(parser);
20409 
20410  // * operators at the beginning of expressions are only valid in the
20411  // context of a multiple assignment. We enforce that here. We'll
20412  // still lex past it though and create a missing node place.
20413  if (binding_power != PM_BINDING_POWER_STATEMENT) {
20414  pm_parser_err_prefix(parser, diag_id);
20415  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20416  }
20417 
20418  pm_token_t operator = parser->previous;
20419  pm_node_t *name = NULL;
20420 
20421  if (token_begins_expression_p(parser->current.type)) {
20422  name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20423  }
20424 
20425  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20426 
20427  if (match1(parser, PM_TOKEN_COMMA)) {
20428  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20429  } else {
20430  return parse_target_validate(parser, splat, true);
20431  }
20432  }
20433  case PM_TOKEN_BANG: {
20434  if (binding_power > PM_BINDING_POWER_UNARY) {
20435  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20436  }
20437 
20438  parser_lex(parser);
20439 
20440  pm_token_t operator = parser->previous;
20441  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20442  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20443 
20444  pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20445  return (pm_node_t *) node;
20446  }
20447  case PM_TOKEN_TILDE: {
20448  if (binding_power > PM_BINDING_POWER_UNARY) {
20449  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20450  }
20451  parser_lex(parser);
20452 
20453  pm_token_t operator = parser->previous;
20454  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20455  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20456 
20457  return (pm_node_t *) node;
20458  }
20459  case PM_TOKEN_UMINUS: {
20460  if (binding_power > PM_BINDING_POWER_UNARY) {
20461  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20462  }
20463  parser_lex(parser);
20464 
20465  pm_token_t operator = parser->previous;
20466  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20467  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20468 
20469  return (pm_node_t *) node;
20470  }
20471  case PM_TOKEN_UMINUS_NUM: {
20472  parser_lex(parser);
20473 
20474  pm_token_t operator = parser->previous;
20475  pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20476 
20477  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20478  pm_token_t exponent_operator = parser->previous;
20479  pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20480  node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20481  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20482  } else {
20483  switch (PM_NODE_TYPE(node)) {
20484  case PM_INTEGER_NODE:
20485  case PM_FLOAT_NODE:
20486  case PM_RATIONAL_NODE:
20487  case PM_IMAGINARY_NODE:
20488  parse_negative_numeric(node);
20489  break;
20490  default:
20491  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20492  break;
20493  }
20494  }
20495 
20496  return node;
20497  }
20498  case PM_TOKEN_MINUS_GREATER: {
20499  int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20500  parser->lambda_enclosure_nesting = parser->enclosure_nesting;
20501 
20502  size_t opening_newline_index = token_newline_index(parser);
20503  pm_accepts_block_stack_push(parser, true);
20504  parser_lex(parser);
20505 
20506  pm_token_t operator = parser->previous;
20507  pm_parser_scope_push(parser, false);
20508 
20509  pm_block_parameters_node_t *block_parameters;
20510 
20511  switch (parser->current.type) {
20513  pm_token_t opening = parser->current;
20514  parser_lex(parser);
20515 
20516  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20517  block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20518  } else {
20519  block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20520  }
20521 
20522  accept1(parser, PM_TOKEN_NEWLINE);
20523  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20524 
20525  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20526  break;
20527  }
20528  case PM_CASE_PARAMETER: {
20529  pm_accepts_block_stack_push(parser, false);
20530  pm_token_t opening = not_provided(parser);
20531  block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20532  pm_accepts_block_stack_pop(parser);
20533  break;
20534  }
20535  default: {
20536  block_parameters = NULL;
20537  break;
20538  }
20539  }
20540 
20541  pm_token_t opening;
20542  pm_node_t *body = NULL;
20543  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20544 
20545  if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20546  opening = parser->previous;
20547 
20548  if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20549  body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20550  }
20551 
20552  parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20553  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20554  } else {
20555  expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20556  opening = parser->previous;
20557 
20559  pm_accepts_block_stack_push(parser, true);
20560  body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20561  pm_accepts_block_stack_pop(parser);
20562  }
20563 
20564  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20565  assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20566  body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20567  } else {
20568  parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20569  }
20570 
20571  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20572  }
20573 
20574  pm_constant_id_list_t locals;
20575  pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20576  pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20577 
20578  pm_parser_scope_pop(parser);
20579  pm_accepts_block_stack_pop(parser);
20580 
20581  return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20582  }
20583  case PM_TOKEN_UPLUS: {
20584  if (binding_power > PM_BINDING_POWER_UNARY) {
20585  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20586  }
20587  parser_lex(parser);
20588 
20589  pm_token_t operator = parser->previous;
20590  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20591  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20592 
20593  return (pm_node_t *) node;
20594  }
20595  case PM_TOKEN_STRING_BEGIN:
20596  return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20597  case PM_TOKEN_SYMBOL_BEGIN: {
20598  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20599  parser_lex(parser);
20600 
20601  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20602  }
20603  default: {
20604  pm_context_t recoverable = context_recoverable(parser, &parser->current);
20605 
20606  if (recoverable != PM_CONTEXT_NONE) {
20607  parser->recovering = true;
20608 
20609  // If the given error is not the generic one, then we'll add it
20610  // here because it will provide more context in addition to the
20611  // recoverable error that we will also add.
20612  if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20613  pm_parser_err_prefix(parser, diag_id);
20614  }
20615 
20616  // If we get here, then we are assuming this token is closing a
20617  // parent context, so we'll indicate that to the user so that
20618  // they know how we behaved.
20619  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20620  } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20621  // We're going to make a special case here, because "cannot
20622  // parse expression" is pretty generic, and we know here that we
20623  // have an unexpected token.
20624  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20625  } else {
20626  pm_parser_err_prefix(parser, diag_id);
20627  }
20628 
20629  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20630  }
20631  }
20632 }
20633 
20643 static pm_node_t *
20644 parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20645  pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20646 
20647  // Contradicting binding powers, the right-hand-side value of the assignment
20648  // allows the `rescue` modifier.
20649  if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20650  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20651 
20652  pm_token_t rescue = parser->current;
20653  parser_lex(parser);
20654 
20655  pm_node_t *right = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20656  context_pop(parser);
20657 
20658  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20659  }
20660 
20661  return value;
20662 }
20663 
20668 static void
20669 parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20670  switch (PM_NODE_TYPE(node)) {
20671  case PM_BEGIN_NODE: {
20672  const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20673  if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20674  break;
20675  }
20678  pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20679  break;
20680  }
20681  case PM_PARENTHESES_NODE: {
20682  const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20683  if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20684  break;
20685  }
20686  case PM_STATEMENTS_NODE: {
20687  const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20688  const pm_node_t *statement;
20689 
20690  PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20691  parse_assignment_value_local(parser, statement);
20692  }
20693  break;
20694  }
20695  default:
20696  break;
20697  }
20698 }
20699 
20712 static pm_node_t *
20713 parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20714  bool permitted = true;
20715  if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20716 
20717  pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20718  if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20719 
20720  parse_assignment_value_local(parser, value);
20721  bool single_value = true;
20722 
20723  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20724  single_value = false;
20725 
20726  pm_token_t opening = not_provided(parser);
20727  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20728 
20729  pm_array_node_elements_append(array, value);
20730  value = (pm_node_t *) array;
20731 
20732  while (accept1(parser, PM_TOKEN_COMMA)) {
20733  pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20734 
20735  pm_array_node_elements_append(array, element);
20736  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20737 
20738  parse_assignment_value_local(parser, element);
20739  }
20740  }
20741 
20742  // Contradicting binding powers, the right-hand-side value of the assignment
20743  // allows the `rescue` modifier.
20744  if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20745  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20746 
20747  pm_token_t rescue = parser->current;
20748  parser_lex(parser);
20749 
20750  bool accepts_command_call_inner = false;
20751 
20752  // RHS can accept command call iff the value is a call with arguments
20753  // but without parenthesis.
20754  if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20755  pm_call_node_t *call_node = (pm_call_node_t *) value;
20756  if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20757  accepts_command_call_inner = true;
20758  }
20759  }
20760 
20761  pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20762  context_pop(parser);
20763 
20764  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20765  }
20766 
20767  return value;
20768 }
20769 
20777 static void
20778 parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20779  if (call_node->arguments != NULL) {
20780  pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20781  pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20782  call_node->arguments = NULL;
20783  }
20784 
20785  if (call_node->block != NULL) {
20786  pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20787  pm_node_destroy(parser, (pm_node_t *) call_node->block);
20788  call_node->block = NULL;
20789  }
20790 }
20791 
20796 typedef struct {
20799 
20802 
20805 
20808 
20814  bool shared;
20816 
20821 static void
20822 parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20824 
20825  pm_parser_t *parser = callback_data->parser;
20826  pm_call_node_t *call = callback_data->call;
20827  pm_constant_id_list_t *names = &callback_data->names;
20828 
20829  const uint8_t *source = pm_string_source(capture);
20830  size_t length = pm_string_length(capture);
20831 
20832  pm_location_t location;
20833  pm_constant_id_t name;
20834 
20835  // If the name of the capture group isn't a valid identifier, we do
20836  // not add it to the local table.
20837  if (!pm_slice_is_valid_local(parser, source, source + length)) return;
20838 
20839  if (callback_data->shared) {
20840  // If the unescaped string is a slice of the source, then we can
20841  // copy the names directly. The pointers will line up.
20842  location = (pm_location_t) { .start = source, .end = source + length };
20843  name = pm_parser_constant_id_location(parser, location.start, location.end);
20844  } else {
20845  // Otherwise, the name is a slice of the malloc-ed owned string,
20846  // in which case we need to copy it out into a new string.
20847  location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20848 
20849  void *memory = xmalloc(length);
20850  if (memory == NULL) abort();
20851 
20852  memcpy(memory, source, length);
20853  name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20854  }
20855 
20856  // Add this name to the list of constants if it is valid, not duplicated,
20857  // and not a keyword.
20858  if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20859  pm_constant_id_list_append(names, name);
20860 
20861  int depth;
20862  if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20863  // If the local is not already a local but it is a keyword, then we
20864  // do not want to add a capture for this.
20865  if (pm_local_is_keyword((const char *) source, length)) return;
20866 
20867  // If the identifier is not already a local, then we will add it to
20868  // the local table.
20869  pm_parser_local_add(parser, name, location.start, location.end, 0);
20870  }
20871 
20872  // Here we lazily create the MatchWriteNode since we know we're
20873  // about to add a target.
20874  if (callback_data->match == NULL) {
20875  callback_data->match = pm_match_write_node_create(parser, call);
20876  }
20877 
20878  // Next, create the local variable target and add it to the list of
20879  // targets for the match.
20880  pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20881  pm_node_list_append(&callback_data->match->targets, target);
20882  }
20883 }
20884 
20889 static pm_node_t *
20890 parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20892  .parser = parser,
20893  .call = call,
20894  .names = { 0 },
20895  .shared = content->type == PM_STRING_SHARED
20896  };
20897 
20899  .parser = parser,
20900  .start = call->receiver->location.start,
20901  .end = call->receiver->location.end,
20902  .shared = content->type == PM_STRING_SHARED
20903  };
20904 
20905  pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20906  pm_constant_id_list_free(&callback_data.names);
20907 
20908  if (callback_data.match != NULL) {
20909  return (pm_node_t *) callback_data.match;
20910  } else {
20911  return (pm_node_t *) call;
20912  }
20913 }
20914 
20915 static inline pm_node_t *
20916 parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20917  pm_token_t token = parser->current;
20918 
20919  switch (token.type) {
20920  case PM_TOKEN_EQUAL: {
20921  switch (PM_NODE_TYPE(node)) {
20922  case PM_CALL_NODE: {
20923  // If we have no arguments to the call node and we need this
20924  // to be a target then this is either a method call or a
20925  // local variable write. This _must_ happen before the value
20926  // is parsed because it could be referenced in the value.
20927  pm_call_node_t *call_node = (pm_call_node_t *) node;
20929  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20930  }
20931  }
20932  /* fallthrough */
20933  case PM_CASE_WRITABLE: {
20934  parser_lex(parser);
20935  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20936 
20937  if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20938  pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20939  }
20940 
20941  return parse_write(parser, node, &token, value);
20942  }
20943  case PM_SPLAT_NODE: {
20944  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20945  pm_multi_target_node_targets_append(parser, multi_target, node);
20946 
20947  parser_lex(parser);
20948  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20949  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
20950  }
20952  case PM_FALSE_NODE:
20953  case PM_SOURCE_FILE_NODE:
20954  case PM_SOURCE_LINE_NODE:
20955  case PM_NIL_NODE:
20956  case PM_SELF_NODE:
20957  case PM_TRUE_NODE: {
20958  // In these special cases, we have specific error messages
20959  // and we will replace them with local variable writes.
20960  parser_lex(parser);
20961  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20962  return parse_unwriteable_write(parser, node, &token, value);
20963  }
20964  default:
20965  // In this case we have an = sign, but we don't know what
20966  // it's for. We need to treat it as an error. We'll mark it
20967  // as an error and skip past it.
20968  parser_lex(parser);
20969  pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20970  return node;
20971  }
20972  }
20974  switch (PM_NODE_TYPE(node)) {
20977  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20978  /* fallthrough */
20980  parser_lex(parser);
20981 
20982  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20983  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
20984 
20985  pm_node_destroy(parser, node);
20986  return result;
20987  }
20989  parser_lex(parser);
20990 
20991  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20992  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
20993 
20994  pm_node_destroy(parser, node);
20995  return result;
20996  }
20997  case PM_CONSTANT_PATH_NODE: {
20998  parser_lex(parser);
20999 
21000  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21001  pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21002 
21003  return parse_shareable_constant_write(parser, write);
21004  }
21005  case PM_CONSTANT_READ_NODE: {
21006  parser_lex(parser);
21007 
21008  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21009  pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21010 
21011  pm_node_destroy(parser, node);
21012  return parse_shareable_constant_write(parser, write);
21013  }
21015  parser_lex(parser);
21016 
21017  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21018  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21019 
21020  pm_node_destroy(parser, node);
21021  return result;
21022  }
21025  parser_lex(parser);
21026 
21027  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21028  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21029 
21030  pm_node_destroy(parser, node);
21031  return result;
21032  }
21033  case PM_CALL_NODE: {
21034  pm_call_node_t *cast = (pm_call_node_t *) node;
21035 
21036  // If we have a vcall (a method with no arguments and no
21037  // receiver that could have been a local variable) then we
21038  // will transform it into a local variable write.
21040  pm_location_t *message_loc = &cast->message_loc;
21041  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21042 
21043  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21044  parser_lex(parser);
21045 
21046  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21047  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21048 
21049  pm_node_destroy(parser, (pm_node_t *) cast);
21050  return result;
21051  }
21052 
21053  // Move past the token here so that we have already added
21054  // the local variable by this point.
21055  parser_lex(parser);
21056 
21057  // If there is no call operator and the message is "[]" then
21058  // this is an aref expression, and we can transform it into
21059  // an aset expression.
21060  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21061  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21062  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21063  }
21064 
21065  // If this node cannot be writable, then we have an error.
21066  if (pm_call_node_writable_p(parser, cast)) {
21067  parse_write_name(parser, &cast->name);
21068  } else {
21069  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21070  }
21071 
21072  parse_call_operator_write(parser, cast, &token);
21073  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21074  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21075  }
21076  case PM_MULTI_WRITE_NODE: {
21077  parser_lex(parser);
21078  pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21079  return node;
21080  }
21081  default:
21082  parser_lex(parser);
21083 
21084  // In this case we have an &&= sign, but we don't know what it's for.
21085  // We need to treat it as an error. For now, we'll mark it as an error
21086  // and just skip right past it.
21087  pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21088  return node;
21089  }
21090  }
21091  case PM_TOKEN_PIPE_PIPE_EQUAL: {
21092  switch (PM_NODE_TYPE(node)) {
21095  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21096  /* fallthrough */
21098  parser_lex(parser);
21099 
21100  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21101  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21102 
21103  pm_node_destroy(parser, node);
21104  return result;
21105  }
21107  parser_lex(parser);
21108 
21109  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21110  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21111 
21112  pm_node_destroy(parser, node);
21113  return result;
21114  }
21115  case PM_CONSTANT_PATH_NODE: {
21116  parser_lex(parser);
21117 
21118  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21119  pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21120 
21121  return parse_shareable_constant_write(parser, write);
21122  }
21123  case PM_CONSTANT_READ_NODE: {
21124  parser_lex(parser);
21125 
21126  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21127  pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21128 
21129  pm_node_destroy(parser, node);
21130  return parse_shareable_constant_write(parser, write);
21131  }
21133  parser_lex(parser);
21134 
21135  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21136  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21137 
21138  pm_node_destroy(parser, node);
21139  return result;
21140  }
21143  parser_lex(parser);
21144 
21145  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21146  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21147 
21148  pm_node_destroy(parser, node);
21149  return result;
21150  }
21151  case PM_CALL_NODE: {
21152  pm_call_node_t *cast = (pm_call_node_t *) node;
21153 
21154  // If we have a vcall (a method with no arguments and no
21155  // receiver that could have been a local variable) then we
21156  // will transform it into a local variable write.
21158  pm_location_t *message_loc = &cast->message_loc;
21159  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21160 
21161  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21162  parser_lex(parser);
21163 
21164  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21165  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21166 
21167  pm_node_destroy(parser, (pm_node_t *) cast);
21168  return result;
21169  }
21170 
21171  // Move past the token here so that we have already added
21172  // the local variable by this point.
21173  parser_lex(parser);
21174 
21175  // If there is no call operator and the message is "[]" then
21176  // this is an aref expression, and we can transform it into
21177  // an aset expression.
21178  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21179  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21180  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21181  }
21182 
21183  // If this node cannot be writable, then we have an error.
21184  if (pm_call_node_writable_p(parser, cast)) {
21185  parse_write_name(parser, &cast->name);
21186  } else {
21187  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21188  }
21189 
21190  parse_call_operator_write(parser, cast, &token);
21191  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21192  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21193  }
21194  case PM_MULTI_WRITE_NODE: {
21195  parser_lex(parser);
21196  pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21197  return node;
21198  }
21199  default:
21200  parser_lex(parser);
21201 
21202  // In this case we have an ||= sign, but we don't know what it's for.
21203  // We need to treat it as an error. For now, we'll mark it as an error
21204  // and just skip right past it.
21205  pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21206  return node;
21207  }
21208  }
21210  case PM_TOKEN_CARET_EQUAL:
21213  case PM_TOKEN_MINUS_EQUAL:
21215  case PM_TOKEN_PIPE_EQUAL:
21216  case PM_TOKEN_PLUS_EQUAL:
21217  case PM_TOKEN_SLASH_EQUAL:
21218  case PM_TOKEN_STAR_EQUAL:
21219  case PM_TOKEN_STAR_STAR_EQUAL: {
21220  switch (PM_NODE_TYPE(node)) {
21223  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21224  /* fallthrough */
21226  parser_lex(parser);
21227 
21228  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21229  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21230 
21231  pm_node_destroy(parser, node);
21232  return result;
21233  }
21235  parser_lex(parser);
21236 
21237  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21238  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21239 
21240  pm_node_destroy(parser, node);
21241  return result;
21242  }
21243  case PM_CONSTANT_PATH_NODE: {
21244  parser_lex(parser);
21245 
21246  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21247  pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21248 
21249  return parse_shareable_constant_write(parser, write);
21250  }
21251  case PM_CONSTANT_READ_NODE: {
21252  parser_lex(parser);
21253 
21254  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21255  pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21256 
21257  pm_node_destroy(parser, node);
21258  return parse_shareable_constant_write(parser, write);
21259  }
21261  parser_lex(parser);
21262 
21263  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21264  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21265 
21266  pm_node_destroy(parser, node);
21267  return result;
21268  }
21271  parser_lex(parser);
21272 
21273  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21274  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21275 
21276  pm_node_destroy(parser, node);
21277  return result;
21278  }
21279  case PM_CALL_NODE: {
21280  parser_lex(parser);
21281  pm_call_node_t *cast = (pm_call_node_t *) node;
21282 
21283  // If we have a vcall (a method with no arguments and no
21284  // receiver that could have been a local variable) then we
21285  // will transform it into a local variable write.
21287  pm_location_t *message_loc = &cast->message_loc;
21288  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21289 
21290  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21291  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21292  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21293 
21294  pm_node_destroy(parser, (pm_node_t *) cast);
21295  return result;
21296  }
21297 
21298  // If there is no call operator and the message is "[]" then
21299  // this is an aref expression, and we can transform it into
21300  // an aset expression.
21301  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21302  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21303  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21304  }
21305 
21306  // If this node cannot be writable, then we have an error.
21307  if (pm_call_node_writable_p(parser, cast)) {
21308  parse_write_name(parser, &cast->name);
21309  } else {
21310  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21311  }
21312 
21313  parse_call_operator_write(parser, cast, &token);
21314  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21315  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21316  }
21317  case PM_MULTI_WRITE_NODE: {
21318  parser_lex(parser);
21319  pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21320  return node;
21321  }
21322  default:
21323  parser_lex(parser);
21324 
21325  // In this case we have an operator but we don't know what it's for.
21326  // We need to treat it as an error. For now, we'll mark it as an error
21327  // and just skip right past it.
21328  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21329  return node;
21330  }
21331  }
21333  case PM_TOKEN_KEYWORD_AND: {
21334  parser_lex(parser);
21335 
21336  pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21337  return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21338  }
21339  case PM_TOKEN_KEYWORD_OR:
21340  case PM_TOKEN_PIPE_PIPE: {
21341  parser_lex(parser);
21342 
21343  pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21344  return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21345  }
21346  case PM_TOKEN_EQUAL_TILDE: {
21347  // Note that we _must_ parse the value before adding the local
21348  // variables in order to properly mirror the behavior of Ruby. For
21349  // example,
21350  //
21351  // /(?<foo>bar)/ =~ foo
21352  //
21353  // In this case, `foo` should be a method call and not a local yet.
21354  parser_lex(parser);
21355  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21356 
21357  // By default, we're going to create a call node and then return it.
21358  pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21359  pm_node_t *result = (pm_node_t *) call;
21360 
21361  // If the receiver of this =~ is a regular expression node, then we
21362  // need to introduce local variables for it based on its named
21363  // capture groups.
21365  // It's possible to have an interpolated regular expression node
21366  // that only contains strings. This is because it can be split
21367  // up by a heredoc. In this case we need to concat the unescaped
21368  // strings together and then parse them as a regular expression.
21369  pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
21370 
21371  bool interpolated = false;
21372  size_t total_length = 0;
21373 
21374  pm_node_t *part;
21375  PM_NODE_LIST_FOREACH(parts, index, part) {
21376  if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21377  total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21378  } else {
21379  interpolated = true;
21380  break;
21381  }
21382  }
21383 
21384  if (!interpolated && total_length > 0) {
21385  void *memory = xmalloc(total_length);
21386  if (!memory) abort();
21387 
21388  uint8_t *cursor = memory;
21389  PM_NODE_LIST_FOREACH(parts, index, part) {
21390  pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21391  size_t length = pm_string_length(unescaped);
21392 
21393  memcpy(cursor, pm_string_source(unescaped), length);
21394  cursor += length;
21395  }
21396 
21397  pm_string_t owned;
21398  pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21399 
21400  result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21401  pm_string_free(&owned);
21402  }
21403  } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21404  // If we have a regular expression node, then we can just parse
21405  // the named captures directly off the unescaped string.
21406  const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21407  result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21408  }
21409 
21410  return result;
21411  }
21412  case PM_TOKEN_UAMPERSAND:
21413  case PM_TOKEN_USTAR:
21414  case PM_TOKEN_USTAR_STAR:
21415  // The only times this will occur are when we are in an error state,
21416  // but we'll put them in here so that errors can propagate.
21417  case PM_TOKEN_BANG_EQUAL:
21418  case PM_TOKEN_BANG_TILDE:
21419  case PM_TOKEN_EQUAL_EQUAL:
21422  case PM_TOKEN_CARET:
21423  case PM_TOKEN_PIPE:
21424  case PM_TOKEN_AMPERSAND:
21426  case PM_TOKEN_LESS_LESS:
21427  case PM_TOKEN_MINUS:
21428  case PM_TOKEN_PLUS:
21429  case PM_TOKEN_PERCENT:
21430  case PM_TOKEN_SLASH:
21431  case PM_TOKEN_STAR:
21432  case PM_TOKEN_STAR_STAR: {
21433  parser_lex(parser);
21434  pm_token_t operator = parser->previous;
21435  switch (PM_NODE_TYPE(node)) {
21436  case PM_RESCUE_MODIFIER_NODE: {
21439  PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21440  }
21441  break;
21442  }
21443  case PM_AND_NODE: {
21444  pm_and_node_t *cast = (pm_and_node_t *) node;
21446  PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21447  }
21448  break;
21449  }
21450  case PM_OR_NODE: {
21451  pm_or_node_t *cast = (pm_or_node_t *) node;
21453  PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21454  }
21455  break;
21456  }
21457  default:
21458  break;
21459  }
21460 
21461  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21462  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21463  }
21464  case PM_TOKEN_GREATER:
21466  case PM_TOKEN_LESS:
21467  case PM_TOKEN_LESS_EQUAL: {
21468  if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21469  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21470  }
21471 
21472  parser_lex(parser);
21473  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21474  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21475  }
21477  case PM_TOKEN_DOT: {
21478  parser_lex(parser);
21479  pm_token_t operator = parser->previous;
21480  pm_arguments_t arguments = { 0 };
21481 
21482  // This if statement handles the foo.() syntax.
21483  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21484  parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21485  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21486  }
21487 
21488  switch (PM_NODE_TYPE(node)) {
21489  case PM_RESCUE_MODIFIER_NODE: {
21492  PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21493  }
21494  break;
21495  }
21496  case PM_AND_NODE: {
21497  pm_and_node_t *cast = (pm_and_node_t *) node;
21499  PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21500  }
21501  break;
21502  }
21503  case PM_OR_NODE: {
21504  pm_or_node_t *cast = (pm_or_node_t *) node;
21506  PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21507  }
21508  break;
21509  }
21510  default:
21511  break;
21512  }
21513 
21514  pm_token_t message;
21515 
21516  switch (parser->current.type) {
21517  case PM_CASE_OPERATOR:
21518  case PM_CASE_KEYWORD:
21519  case PM_TOKEN_CONSTANT:
21520  case PM_TOKEN_IDENTIFIER:
21521  case PM_TOKEN_METHOD_NAME: {
21522  parser_lex(parser);
21523  message = parser->previous;
21524  break;
21525  }
21526  default: {
21527  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21528  message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21529  }
21530  }
21531 
21532  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21533  pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21534 
21535  if (
21536  (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21537  arguments.arguments == NULL &&
21538  arguments.opening_loc.start == NULL &&
21539  match1(parser, PM_TOKEN_COMMA)
21540  ) {
21541  return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21542  } else {
21543  return (pm_node_t *) call;
21544  }
21545  }
21546  case PM_TOKEN_DOT_DOT:
21547  case PM_TOKEN_DOT_DOT_DOT: {
21548  parser_lex(parser);
21549 
21550  pm_node_t *right = NULL;
21551  if (token_begins_expression_p(parser->current.type)) {
21552  right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21553  }
21554 
21555  return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21556  }
21558  pm_token_t keyword = parser->current;
21559  parser_lex(parser);
21560 
21561  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21562  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21563  }
21565  pm_token_t keyword = parser->current;
21566  parser_lex(parser);
21567 
21568  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21569  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21570  }
21572  parser_lex(parser);
21573  pm_statements_node_t *statements = pm_statements_node_create(parser);
21574  pm_statements_node_body_append(parser, statements, node, true);
21575 
21576  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21577  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21578  }
21580  parser_lex(parser);
21581  pm_statements_node_t *statements = pm_statements_node_create(parser);
21582  pm_statements_node_body_append(parser, statements, node, true);
21583 
21584  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21585  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21586  }
21587  case PM_TOKEN_QUESTION_MARK: {
21588  context_push(parser, PM_CONTEXT_TERNARY);
21589  pm_node_list_t current_block_exits = { 0 };
21590  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21591 
21592  pm_token_t qmark = parser->current;
21593  parser_lex(parser);
21594 
21595  pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21596 
21597  if (parser->recovering) {
21598  // If parsing the true expression of this ternary resulted in a syntax
21599  // error that we can recover from, then we're going to put missing nodes
21600  // and tokens into the remaining places. We want to be sure to do this
21601  // before the `expect` function call to make sure it doesn't
21602  // accidentally move past a ':' token that occurs after the syntax
21603  // error.
21604  pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21605  pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21606 
21607  context_pop(parser);
21608  pop_block_exits(parser, previous_block_exits);
21609  pm_node_list_free(&current_block_exits);
21610 
21611  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21612  }
21613 
21614  accept1(parser, PM_TOKEN_NEWLINE);
21615  expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21616 
21617  pm_token_t colon = parser->previous;
21618  pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21619 
21620  context_pop(parser);
21621  pop_block_exits(parser, previous_block_exits);
21622  pm_node_list_free(&current_block_exits);
21623 
21624  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21625  }
21626  case PM_TOKEN_COLON_COLON: {
21627  parser_lex(parser);
21628  pm_token_t delimiter = parser->previous;
21629 
21630  switch (parser->current.type) {
21631  case PM_TOKEN_CONSTANT: {
21632  parser_lex(parser);
21633  pm_node_t *path;
21634 
21635  if (
21636  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21637  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21638  ) {
21639  // If we have a constant immediately following a '::' operator, then
21640  // this can either be a constant path or a method call, depending on
21641  // what follows the constant.
21642  //
21643  // If we have parentheses, then this is a method call. That would
21644  // look like Foo::Bar().
21645  pm_token_t message = parser->previous;
21646  pm_arguments_t arguments = { 0 };
21647 
21648  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21649  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21650  } else {
21651  // Otherwise, this is a constant path. That would look like Foo::Bar.
21652  path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21653  }
21654 
21655  // If this is followed by a comma then it is a multiple assignment.
21656  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21657  return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21658  }
21659 
21660  return path;
21661  }
21662  case PM_CASE_OPERATOR:
21663  case PM_CASE_KEYWORD:
21664  case PM_TOKEN_IDENTIFIER:
21665  case PM_TOKEN_METHOD_NAME: {
21666  parser_lex(parser);
21667  pm_token_t message = parser->previous;
21668 
21669  // If we have an identifier following a '::' operator, then it is for
21670  // sure a method call.
21671  pm_arguments_t arguments = { 0 };
21672  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21673  pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21674 
21675  // If this is followed by a comma then it is a multiple assignment.
21676  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21677  return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21678  }
21679 
21680  return (pm_node_t *) call;
21681  }
21683  // If we have a parenthesis following a '::' operator, then it is the
21684  // method call shorthand. That would look like Foo::(bar).
21685  pm_arguments_t arguments = { 0 };
21686  parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21687 
21688  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21689  }
21690  default: {
21691  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21692  return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21693  }
21694  }
21695  }
21697  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21698  parser_lex(parser);
21699  accept1(parser, PM_TOKEN_NEWLINE);
21700 
21701  pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21702  context_pop(parser);
21703 
21704  return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21705  }
21706  case PM_TOKEN_BRACKET_LEFT: {
21707  parser_lex(parser);
21708 
21709  pm_arguments_t arguments = { 0 };
21710  arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21711 
21712  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21713  pm_accepts_block_stack_push(parser, true);
21714  parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21715  pm_accepts_block_stack_pop(parser);
21716  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21717  }
21718 
21719  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21720 
21721  // If we have a comma after the closing bracket then this is a multiple
21722  // assignment and we should parse the targets.
21723  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21724  pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21725  return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21726  }
21727 
21728  // If we're at the end of the arguments, we can now check if there is a
21729  // block node that starts with a {. If there is, then we can parse it and
21730  // add it to the arguments.
21731  pm_block_node_t *block = NULL;
21732  if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21733  block = parse_block(parser, (uint16_t) (depth + 1));
21734  pm_arguments_validate_block(parser, &arguments, block);
21735  } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21736  block = parse_block(parser, (uint16_t) (depth + 1));
21737  }
21738 
21739  if (block != NULL) {
21740  if (arguments.block != NULL) {
21741  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
21742  if (arguments.arguments == NULL) {
21743  arguments.arguments = pm_arguments_node_create(parser);
21744  }
21745  pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21746  }
21747 
21748  arguments.block = (pm_node_t *) block;
21749  }
21750 
21751  return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
21752  }
21753  case PM_TOKEN_KEYWORD_IN: {
21754  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21755  parser->pattern_matching_newlines = true;
21756 
21757  pm_token_t operator = parser->current;
21758  parser->command_start = false;
21759  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21760  parser_lex(parser);
21761 
21762  pm_constant_id_list_t captures = { 0 };
21763  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21764 
21765  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21766  pm_constant_id_list_free(&captures);
21767 
21768  return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
21769  }
21770  case PM_TOKEN_EQUAL_GREATER: {
21771  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21772  parser->pattern_matching_newlines = true;
21773 
21774  pm_token_t operator = parser->current;
21775  parser->command_start = false;
21776  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21777  parser_lex(parser);
21778 
21779  pm_constant_id_list_t captures = { 0 };
21780  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21781 
21782  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21783  pm_constant_id_list_free(&captures);
21784 
21785  return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
21786  }
21787  default:
21788  assert(false && "unreachable");
21789  return NULL;
21790  }
21791 }
21792 
21793 #undef PM_PARSE_PATTERN_SINGLE
21794 #undef PM_PARSE_PATTERN_TOP
21795 #undef PM_PARSE_PATTERN_MULTI
21796 
21801 static inline bool
21802 pm_call_node_command_p(const pm_call_node_t *node) {
21803  return (
21804  (node->opening_loc.start == NULL) &&
21805  (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21806  (node->arguments != NULL || node->block != NULL)
21807  );
21808 }
21809 
21818 static pm_node_t *
21819 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21820  if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21821  pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21822  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
21823  }
21824 
21825  pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21826 
21827  switch (PM_NODE_TYPE(node)) {
21828  case PM_MISSING_NODE:
21829  // If we found a syntax error, then the type of node returned by
21830  // parse_expression_prefix is going to be a missing node.
21831  return node;
21832  case PM_PRE_EXECUTION_NODE:
21835  case PM_ALIAS_METHOD_NODE:
21836  case PM_MULTI_WRITE_NODE:
21837  case PM_UNDEF_NODE:
21838  // These expressions are statements, and cannot be followed by
21839  // operators (except modifiers).
21840  if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21841  return node;
21842  }
21843  break;
21844  case PM_CALL_NODE:
21845  // If we have a call node, then we need to check if it looks like a
21846  // method call without parentheses that contains arguments. If it
21847  // does, then it has different rules for parsing infix operators,
21848  // namely that it only accepts composition (and/or) and modifiers
21849  // (if/unless/etc.).
21850  if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21851  return node;
21852  }
21853  break;
21854  case PM_SYMBOL_NODE:
21855  // If we have a symbol node that is being parsed as a label, then we
21856  // need to immediately return, because there should never be an
21857  // infix operator following this node.
21858  if (pm_symbol_node_label_p(node)) {
21859  return node;
21860  }
21861  default:
21862  break;
21863  }
21864 
21865  // Otherwise we'll look and see if the next token can be parsed as an infix
21866  // operator. If it can, then we'll parse it using parse_expression_infix.
21867  pm_binding_powers_t current_binding_powers;
21868  pm_token_type_t current_token_type;
21869 
21870  while (
21871  current_token_type = parser->current.type,
21872  current_binding_powers = pm_binding_powers[current_token_type],
21873  binding_power <= current_binding_powers.left &&
21874  current_binding_powers.binary
21875  ) {
21876  node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21877 
21878  switch (PM_NODE_TYPE(node)) {
21879  case PM_MULTI_WRITE_NODE:
21880  // Multi-write nodes are statements, and cannot be followed by
21881  // operators except modifiers.
21882  if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21883  return node;
21884  }
21885  break;
21892  // These expressions are statements, by virtue of the right-hand
21893  // side of their write being an implicit array.
21894  if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21895  return node;
21896  }
21897  break;
21898  case PM_CALL_NODE:
21899  // These expressions are also statements, by virtue of the
21900  // right-hand side of the expression (i.e., the last argument to
21901  // the call node) being an implicit array.
21902  if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21903  return node;
21904  }
21905  break;
21906  default:
21907  break;
21908  }
21909 
21910  // If the operator is nonassoc and we should not be able to parse the
21911  // upcoming infix operator, break.
21912  if (current_binding_powers.nonassoc) {
21913  // If this is a non-assoc operator and we are about to parse the
21914  // exact same operator, then we need to add an error.
21915  if (match1(parser, current_token_type)) {
21916  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21917  break;
21918  }
21919 
21920  // If this is an endless range, then we need to reject a couple of
21921  // additional operators because it violates the normal operator
21922  // precedence rules. Those patterns are:
21923  //
21924  // 1.. & 2
21925  // 1.. * 2
21926  //
21927  if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21929  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21930  break;
21931  }
21932 
21933  if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21934  break;
21935  }
21936  } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21937  break;
21938  }
21939  }
21940 
21941  if (accepts_command_call) {
21942  // A command-style method call is only accepted on method chains.
21943  // Thus, we check whether the parsed node can continue method chains.
21944  // The method chain can continue if the parsed node is one of the following five kinds:
21945  // (1) index access: foo[1]
21946  // (2) attribute access: foo.bar
21947  // (3) method call with parenthesis: foo.bar(1)
21948  // (4) method call with a block: foo.bar do end
21949  // (5) constant path: foo::Bar
21950  switch (node->type) {
21951  case PM_CALL_NODE: {
21952  pm_call_node_t *cast = (pm_call_node_t *)node;
21953  if (
21954  // (1) foo[1]
21955  !(
21956  cast->call_operator_loc.start == NULL &&
21957  cast->message_loc.start != NULL &&
21958  cast->message_loc.start[0] == '[' &&
21959  cast->message_loc.end[-1] == ']'
21960  ) &&
21961  // (2) foo.bar
21962  !(
21963  cast->call_operator_loc.start != NULL &&
21964  cast->arguments == NULL &&
21965  cast->block == NULL &&
21966  cast->opening_loc.start == NULL
21967  ) &&
21968  // (3) foo.bar(1)
21969  !(
21970  cast->call_operator_loc.start != NULL &&
21971  cast->opening_loc.start != NULL
21972  ) &&
21973  // (4) foo.bar do end
21974  !(
21975  cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21976  )
21977  ) {
21978  accepts_command_call = false;
21979  }
21980  break;
21981  }
21982  // (5) foo::Bar
21983  case PM_CONSTANT_PATH_NODE:
21984  break;
21985  default:
21986  accepts_command_call = false;
21987  break;
21988  }
21989  }
21990  }
21991 
21992  return node;
21993 }
21994 
21999 static pm_statements_node_t *
22000 wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
22001  if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
22002  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22003  pm_arguments_node_arguments_append(
22004  arguments,
22005  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
22006  );
22007 
22008  pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
22009  parser,
22010  arguments,
22011  pm_parser_constant_id_constant(parser, "print", 5)
22012  ), true);
22013  }
22014 
22015  if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
22016  if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
22017  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22018  pm_arguments_node_arguments_append(
22019  arguments,
22020  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
22021  );
22022 
22023  pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
22024  pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
22025 
22026  pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
22027  parser,
22028  pm_parser_constant_id_constant(parser, "$F", 2),
22029  (pm_node_t *) call
22030  );
22031 
22032  pm_statements_node_body_prepend(statements, (pm_node_t *) write);
22033  }
22034 
22035  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
22036  pm_arguments_node_arguments_append(
22037  arguments,
22038  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
22039  );
22040 
22041  if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
22042  pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
22043  pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
22044  parser,
22045  (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
22046  &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
22047  (pm_node_t *) pm_true_node_synthesized_create(parser)
22048  ));
22049 
22050  pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
22051  pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
22052  }
22053 
22054  pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
22055  pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
22056  parser,
22057  (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
22058  statements
22059  ), true);
22060 
22061  statements = wrapped_statements;
22062  }
22063 
22064  return statements;
22065 }
22066 
22070 static pm_node_t *
22071 parse_program(pm_parser_t *parser) {
22072  // If the current scope is NULL, then we want to push a new top level scope.
22073  // The current scope could exist in the event that we are parsing an eval
22074  // and the user has passed into scopes that already exist.
22075  if (parser->current_scope == NULL) {
22076  pm_parser_scope_push(parser, true);
22077  }
22078 
22079  pm_node_list_t current_block_exits = { 0 };
22080  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22081 
22082  parser_lex(parser);
22083  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22084 
22085  if (statements == NULL) {
22086  statements = pm_statements_node_create(parser);
22087  } else if (!parser->parsing_eval) {
22088  // If we have statements, then the top-level statement should be
22089  // explicitly checked as well. We have to do this here because
22090  // everywhere else we check all but the last statement.
22091  assert(statements->body.size > 0);
22092  pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22093  }
22094 
22095  pm_constant_id_list_t locals;
22096  pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22097  pm_parser_scope_pop(parser);
22098 
22099  // If this is an empty file, then we're still going to parse all of the
22100  // statements in order to gather up all of the comments and such. Here we'll
22101  // correct the location information.
22102  if (pm_statements_node_body_length(statements) == 0) {
22103  pm_statements_node_location_set(statements, parser->start, parser->start);
22104  }
22105 
22106  // At the top level, see if we need to wrap the statements in a program
22107  // node with a while loop based on the options.
22109  statements = wrap_statements(parser, statements);
22110  } else {
22111  flush_block_exits(parser, previous_block_exits);
22112  pm_node_list_free(&current_block_exits);
22113  }
22114 
22115  return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22116 }
22117 
22118 /******************************************************************************/
22119 /* External functions */
22120 /******************************************************************************/
22121 
22131 static const char *
22132 pm_strnstr(const char *big, const char *little, size_t big_length) {
22133  size_t little_length = strlen(little);
22134 
22135  for (const char *big_end = big + big_length; big < big_end; big++) {
22136  if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22137  }
22138 
22139  return NULL;
22140 }
22141 
22142 #ifdef _WIN32
22143 #define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22144 #else
22150 static void
22151 pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22152  if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22153  pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22154  }
22155 }
22156 #endif
22157 
22162 static void
22163 pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22164  const char *switches = pm_strnstr(engine, " -", length);
22165  if (switches == NULL) return;
22166 
22167  pm_options_t next_options = *options;
22168  options->shebang_callback(
22169  &next_options,
22170  (const uint8_t *) (switches + 1),
22171  length - ((size_t) (switches - engine)) - 1,
22172  options->shebang_callback_data
22173  );
22174 
22175  size_t encoding_length;
22176  if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22177  const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22178  parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22179  }
22180 
22181  parser->command_line = next_options.command_line;
22182  parser->frozen_string_literal = next_options.frozen_string_literal;
22183 }
22184 
22189 pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22190  assert(source != NULL);
22191 
22192  *parser = (pm_parser_t) {
22193  .node_id = 0,
22194  .lex_state = PM_LEX_STATE_BEG,
22195  .enclosure_nesting = 0,
22196  .lambda_enclosure_nesting = -1,
22197  .brace_nesting = 0,
22198  .do_loop_stack = 0,
22199  .accepts_block_stack = 0,
22200  .lex_modes = {
22201  .index = 0,
22202  .stack = {{ .mode = PM_LEX_DEFAULT }},
22203  .current = &parser->lex_modes.stack[0],
22204  },
22205  .start = source,
22206  .end = source + size,
22207  .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22208  .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22209  .next_start = NULL,
22210  .heredoc_end = NULL,
22211  .data_loc = { .start = NULL, .end = NULL },
22212  .comment_list = { 0 },
22213  .magic_comment_list = { 0 },
22214  .warning_list = { 0 },
22215  .error_list = { 0 },
22216  .current_scope = NULL,
22217  .current_context = NULL,
22218  .encoding = PM_ENCODING_UTF_8_ENTRY,
22219  .encoding_changed_callback = NULL,
22220  .encoding_comment_start = source,
22221  .lex_callback = NULL,
22222  .filepath = { 0 },
22223  .constant_pool = { 0 },
22224  .newline_list = { 0 },
22225  .integer_base = 0,
22226  .current_string = PM_STRING_EMPTY,
22227  .start_line = 1,
22228  .explicit_encoding = NULL,
22229  .command_line = 0,
22230  .parsing_eval = false,
22231  .partial_script = false,
22232  .command_start = true,
22233  .recovering = false,
22234  .encoding_locked = false,
22235  .encoding_changed = false,
22236  .pattern_matching_newlines = false,
22237  .in_keyword_arg = false,
22238  .current_block_exits = NULL,
22239  .semantic_token_seen = false,
22240  .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22241  .current_regular_expression_ascii_only = false,
22242  .warn_mismatched_indentation = true
22243  };
22244 
22245  // Initialize the constant pool. We're going to completely guess as to the
22246  // number of constants that we'll need based on the size of the input. The
22247  // ratio we chose here is actually less arbitrary than you might think.
22248  //
22249  // We took ~50K Ruby files and measured the size of the file versus the
22250  // number of constants that were found in those files. Then we found the
22251  // average and standard deviation of the ratios of constants/bytesize. Then
22252  // we added 1.34 standard deviations to the average to get a ratio that
22253  // would fit 75% of the files (for a two-tailed distribution). This works
22254  // because there was about a 0.77 correlation and the distribution was
22255  // roughly normal.
22256  //
22257  // This ratio will need to change if we add more constants to the constant
22258  // pool for another node type.
22259  uint32_t constant_size = ((uint32_t) size) / 95;
22260  pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22261 
22262  // Initialize the newline list. Similar to the constant pool, we're going to
22263  // guess at the number of newlines that we'll need based on the size of the
22264  // input.
22265  size_t newline_size = size / 22;
22266  pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22267 
22268  // If options were provided to this parse, establish them here.
22269  if (options != NULL) {
22270  // filepath option
22271  parser->filepath = options->filepath;
22272 
22273  // line option
22274  parser->start_line = options->line;
22275 
22276  // encoding option
22277  size_t encoding_length = pm_string_length(&options->encoding);
22278  if (encoding_length > 0) {
22279  const uint8_t *encoding_source = pm_string_source(&options->encoding);
22280  parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22281  }
22282 
22283  // encoding_locked option
22284  parser->encoding_locked = options->encoding_locked;
22285 
22286  // frozen_string_literal option
22287  parser->frozen_string_literal = options->frozen_string_literal;
22288 
22289  // command_line option
22290  parser->command_line = options->command_line;
22291 
22292  // version option
22293  parser->version = options->version;
22294 
22295  // partial_script
22296  parser->partial_script = options->partial_script;
22297 
22298  // scopes option
22299  parser->parsing_eval = options->scopes_count > 0;
22300  if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22301 
22302  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22303  const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22304  pm_parser_scope_push(parser, scope_index == 0);
22305 
22306  // Scopes given from the outside are not allowed to have numbered
22307  // parameters.
22308  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22309 
22310  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22311  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22312 
22313  const uint8_t *source = pm_string_source(local);
22314  size_t length = pm_string_length(local);
22315 
22316  void *allocated = xmalloc(length);
22317  if (allocated == NULL) continue;
22318 
22319  memcpy(allocated, source, length);
22320  pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22321  }
22322  }
22323  }
22324 
22325  pm_accepts_block_stack_push(parser, true);
22326 
22327  // Skip past the UTF-8 BOM if it exists.
22328  if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22329  parser->current.end += 3;
22330  parser->encoding_comment_start += 3;
22331 
22332  if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22334  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22335  }
22336  }
22337 
22338  // If the -x command line flag is set, or the first shebang of the file does
22339  // not include "ruby", then we'll search for a shebang that does include
22340  // "ruby" and start parsing from there.
22341  bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22342 
22343  // If the first two bytes of the source are a shebang, then we will do a bit
22344  // of extra processing.
22345  //
22346  // First, we'll indicate that the encoding comment is at the end of the
22347  // shebang. This means that when a shebang is present the encoding comment
22348  // can begin on the second line.
22349  //
22350  // Second, we will check if the shebang includes "ruby". If it does, then we
22351  // we will start parsing from there. We will also potentially warning the
22352  // user if there is a carriage return at the end of the shebang. We will
22353  // also potentially call the shebang callback if this is the main script to
22354  // allow the caller to parse the shebang and find any command-line options.
22355  // If the shebang does not include "ruby" and this is the main script being
22356  // parsed, then we will start searching the file for a shebang that does
22357  // contain "ruby" as if -x were passed on the command line.
22358  const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22359  size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22360 
22361  if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22362  const char *engine;
22363 
22364  if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22365  if (newline != NULL) {
22366  parser->encoding_comment_start = newline + 1;
22367 
22368  if (options == NULL || options->main_script) {
22369  pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22370  }
22371  }
22372 
22373  if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22374  pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22375  }
22376 
22377  search_shebang = false;
22378  } else if (options->main_script && !parser->parsing_eval) {
22379  search_shebang = true;
22380  }
22381  }
22382 
22383  // Here we're going to find the first shebang that includes "ruby" and start
22384  // parsing from there.
22385  if (search_shebang) {
22386  // If a shebang that includes "ruby" is not found, then we're going to a
22387  // a load error to the list of errors on the parser.
22388  bool found_shebang = false;
22389 
22390  // This is going to point to the start of each line as we check it.
22391  // We'll maintain a moving window looking at each line at they come.
22392  const uint8_t *cursor = parser->start;
22393 
22394  // The newline pointer points to the end of the current line that we're
22395  // considering. If it is NULL, then we're at the end of the file.
22396  const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22397 
22398  while (newline != NULL) {
22399  pm_newline_list_append(&parser->newline_list, newline);
22400 
22401  cursor = newline + 1;
22402  newline = next_newline(cursor, parser->end - cursor);
22403 
22404  size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22405  if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22406  const char *engine;
22407  if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22408  found_shebang = true;
22409 
22410  if (newline != NULL) {
22411  pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22412  parser->encoding_comment_start = newline + 1;
22413  }
22414 
22415  if (options != NULL && options->shebang_callback != NULL) {
22416  pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22417  }
22418 
22419  break;
22420  }
22421  }
22422  }
22423 
22424  if (found_shebang) {
22425  parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22426  parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22427  } else {
22428  pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22430  }
22431  }
22432 
22433  // The encoding comment can start after any amount of inline whitespace, so
22434  // here we'll advance it to the first non-inline-whitespace character so
22435  // that it is ready for future comparisons.
22437 }
22438 
22445  parser->encoding_changed_callback = callback;
22446 }
22447 
22451 static inline void
22452 pm_comment_list_free(pm_list_t *list) {
22453  pm_list_node_t *node, *next;
22454 
22455  for (node = list->head; node != NULL; node = next) {
22456  next = node->next;
22457 
22458  pm_comment_t *comment = (pm_comment_t *) node;
22459  xfree(comment);
22460  }
22461 }
22462 
22466 static inline void
22467 pm_magic_comment_list_free(pm_list_t *list) {
22468  pm_list_node_t *node, *next;
22469 
22470  for (node = list->head; node != NULL; node = next) {
22471  next = node->next;
22472 
22475  }
22476 }
22477 
22483  pm_string_free(&parser->filepath);
22486  pm_comment_list_free(&parser->comment_list);
22487  pm_magic_comment_list_free(&parser->magic_comment_list);
22490 
22491  while (parser->current_scope != NULL) {
22492  // Normally, popping the scope doesn't free the locals since it is
22493  // assumed that ownership has transferred to the AST. However if we have
22494  // scopes while we're freeing the parser, it's likely they came from
22495  // eval scopes and we need to free them explicitly here.
22496  pm_parser_scope_pop(parser);
22497  }
22498 
22499  while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22500  lex_mode_pop(parser);
22501  }
22502 }
22503 
22509  return parse_program(parser);
22510 }
22511 
22517 static bool
22518 pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22519 #define LINE_SIZE 4096
22520  char line[LINE_SIZE];
22521 
22522  while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22523  size_t length = LINE_SIZE;
22524  while (length > 0 && line[length - 1] == '\n') length--;
22525 
22526  if (length == LINE_SIZE) {
22527  // If we read a line that is the maximum size and it doesn't end
22528  // with a newline, then we'll just append it to the buffer and
22529  // continue reading.
22530  length--;
22531  pm_buffer_append_string(buffer, line, length);
22532  continue;
22533  }
22534 
22535  // Append the line to the buffer.
22536  length--;
22537  pm_buffer_append_string(buffer, line, length);
22538 
22539  // Check if the line matches the __END__ marker. If it does, then stop
22540  // reading and return false. In most circumstances, this means we should
22541  // stop reading from the stream so that the DATA constant can pick it
22542  // up.
22543  switch (length) {
22544  case 7:
22545  if (strncmp(line, "__END__", 7) == 0) return false;
22546  break;
22547  case 8:
22548  if (strncmp(line, "__END__\n", 8) == 0) return false;
22549  break;
22550  case 9:
22551  if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22552  break;
22553  }
22554  }
22555 
22556  return true;
22557 #undef LINE_SIZE
22558 }
22559 
22569 static bool
22570 pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22571  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22572 
22573  for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22574  if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22575  return true;
22576  }
22577  }
22578 
22579  return false;
22580 }
22581 
22589 pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22590  pm_buffer_init(buffer);
22591 
22592  bool eof = pm_parse_stream_read(buffer, stream, fgets);
22593  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22594  pm_node_t *node = pm_parse(parser);
22595 
22596  while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22597  pm_node_destroy(parser, node);
22598  eof = pm_parse_stream_read(buffer, stream, fgets);
22599 
22600  pm_parser_free(parser);
22601  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22602  node = pm_parse(parser);
22603  }
22604 
22605  return node;
22606 }
22607 
22612 pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22613  pm_options_t options = { 0 };
22614  pm_options_read(&options, data);
22615 
22616  pm_parser_t parser;
22617  pm_parser_init(&parser, source, size, &options);
22618 
22619  pm_node_t *node = pm_parse(&parser);
22620  pm_node_destroy(&parser, node);
22621 
22622  bool result = parser.error_list.size == 0;
22623  pm_parser_free(&parser);
22624  pm_options_free(&options);
22625 
22626  return result;
22627 }
22628 
22629 #undef PM_CASE_KEYWORD
22630 #undef PM_CASE_OPERATOR
22631 #undef PM_CASE_WRITABLE
22632 #undef PM_STRING_EMPTY
22633 #undef PM_LOCATION_NODE_BASE_VALUE
22634 #undef PM_LOCATION_NODE_VALUE
22635 #undef PM_LOCATION_NULL_VALUE
22636 #undef PM_LOCATION_TOKEN_VALUE
22637 
22638 // We optionally support serializing to a binary string. For systems that don't
22639 // want or need this functionality, it can be turned off with the
22640 // PRISM_EXCLUDE_SERIALIZATION define.
22641 #ifndef PRISM_EXCLUDE_SERIALIZATION
22642 
22643 static inline void
22644 pm_serialize_header(pm_buffer_t *buffer) {
22645  pm_buffer_append_string(buffer, "PRISM", 5);
22650 }
22651 
22656 pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22657  pm_serialize_header(buffer);
22658  pm_serialize_content(parser, node, buffer);
22659  pm_buffer_append_byte(buffer, '\0');
22660 }
22661 
22667 pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22668  pm_options_t options = { 0 };
22669  pm_options_read(&options, data);
22670 
22671  pm_parser_t parser;
22672  pm_parser_init(&parser, source, size, &options);
22673 
22674  pm_node_t *node = pm_parse(&parser);
22675 
22676  pm_serialize_header(buffer);
22677  pm_serialize_content(&parser, node, buffer);
22678  pm_buffer_append_byte(buffer, '\0');
22679 
22680  pm_node_destroy(&parser, node);
22681  pm_parser_free(&parser);
22682  pm_options_free(&options);
22683 }
22684 
22690 pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22691  pm_parser_t parser;
22692  pm_options_t options = { 0 };
22693  pm_options_read(&options, data);
22694 
22695  pm_buffer_t parser_buffer;
22696  pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22697  pm_serialize_header(buffer);
22698  pm_serialize_content(&parser, node, buffer);
22699  pm_buffer_append_byte(buffer, '\0');
22700 
22701  pm_node_destroy(&parser, node);
22702  pm_buffer_free(&parser_buffer);
22703  pm_parser_free(&parser);
22704  pm_options_free(&options);
22705 }
22706 
22711 pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22712  pm_options_t options = { 0 };
22713  pm_options_read(&options, data);
22714 
22715  pm_parser_t parser;
22716  pm_parser_init(&parser, source, size, &options);
22717 
22718  pm_node_t *node = pm_parse(&parser);
22719  pm_serialize_header(buffer);
22720  pm_serialize_encoding(parser.encoding, buffer);
22721  pm_buffer_append_varsint(buffer, parser.start_line);
22722  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22723 
22724  pm_node_destroy(&parser, node);
22725  pm_parser_free(&parser);
22726  pm_options_free(&options);
22727 }
22728 
22729 #endif
22730 
22731 /******************************************************************************/
22732 /* Slice queries for the Ruby API */
22733 /******************************************************************************/
22734 
22736 typedef enum {
22738  PM_SLICE_TYPE_ERROR = -1,
22739 
22741  PM_SLICE_TYPE_NONE,
22742 
22744  PM_SLICE_TYPE_LOCAL,
22745 
22747  PM_SLICE_TYPE_CONSTANT,
22748 
22750  PM_SLICE_TYPE_METHOD_NAME
22751 } pm_slice_type_t;
22752 
22756 pm_slice_type_t
22757 pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22758  // first, get the right encoding object
22759  const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22760  if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22761 
22762  // check that there is at least one character
22763  if (length == 0) return PM_SLICE_TYPE_NONE;
22764 
22765  size_t width;
22766  if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22767  // valid because alphabetical
22768  } else if (*source == '_') {
22769  // valid because underscore
22770  width = 1;
22771  } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22772  // valid because multibyte
22773  } else {
22774  // invalid because no match
22775  return PM_SLICE_TYPE_NONE;
22776  }
22777 
22778  // determine the type of the slice based on the first character
22779  const uint8_t *end = source + length;
22780  pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22781 
22782  // next, iterate through all of the bytes of the string to ensure that they
22783  // are all valid identifier characters
22784  source += width;
22785 
22786  while (source < end) {
22787  if ((width = encoding->alnum_char(source, end - source)) != 0) {
22788  // valid because alphanumeric
22789  source += width;
22790  } else if (*source == '_') {
22791  // valid because underscore
22792  source++;
22793  } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22794  // valid because multibyte
22795  source += width;
22796  } else {
22797  // invalid because no match
22798  break;
22799  }
22800  }
22801 
22802  // accept a ! or ? at the end of the slice as a method name
22803  if (*source == '!' || *source == '?' || *source == '=') {
22804  source++;
22805  result = PM_SLICE_TYPE_METHOD_NAME;
22806  }
22807 
22808  // valid if we are at the end of the slice
22809  return source == end ? result : PM_SLICE_TYPE_NONE;
22810 }
22811 
22816 pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22817  switch (pm_slice_type(source, length, encoding_name)) {
22818  case PM_SLICE_TYPE_ERROR:
22819  return PM_STRING_QUERY_ERROR;
22820  case PM_SLICE_TYPE_NONE:
22821  case PM_SLICE_TYPE_CONSTANT:
22822  case PM_SLICE_TYPE_METHOD_NAME:
22823  return PM_STRING_QUERY_FALSE;
22824  case PM_SLICE_TYPE_LOCAL:
22825  return PM_STRING_QUERY_TRUE;
22826  }
22827 
22828  assert(false && "unreachable");
22829  return PM_STRING_QUERY_FALSE;
22830 }
22831 
22836 pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22837  switch (pm_slice_type(source, length, encoding_name)) {
22838  case PM_SLICE_TYPE_ERROR:
22839  return PM_STRING_QUERY_ERROR;
22840  case PM_SLICE_TYPE_NONE:
22841  case PM_SLICE_TYPE_LOCAL:
22842  case PM_SLICE_TYPE_METHOD_NAME:
22843  return PM_STRING_QUERY_FALSE;
22844  case PM_SLICE_TYPE_CONSTANT:
22845  return PM_STRING_QUERY_TRUE;
22846  }
22847 
22848  assert(false && "unreachable");
22849  return PM_STRING_QUERY_FALSE;
22850 }
22851 
22856 pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22857 #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22858 #define C1(c) (*source == c)
22859 #define C2(s) (memcmp(source, s, 2) == 0)
22860 #define C3(s) (memcmp(source, s, 3) == 0)
22861 
22862  switch (pm_slice_type(source, length, encoding_name)) {
22863  case PM_SLICE_TYPE_ERROR:
22864  return PM_STRING_QUERY_ERROR;
22865  case PM_SLICE_TYPE_NONE:
22866  break;
22867  case PM_SLICE_TYPE_LOCAL:
22868  // numbered parameters are not valid method names
22869  return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22870  case PM_SLICE_TYPE_CONSTANT:
22871  // all constants are valid method names
22872  case PM_SLICE_TYPE_METHOD_NAME:
22873  // all method names are valid method names
22874  return PM_STRING_QUERY_TRUE;
22875  }
22876 
22877  switch (length) {
22878  case 1:
22879  return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22880  case 2:
22881  return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22882  case 3:
22883  return B(C3("===") || C3("<=>") || C3("[]="));
22884  default:
22885  return PM_STRING_QUERY_FALSE;
22886  }
22887 
22888 #undef B
22889 #undef C1
22890 #undef C2
22891 #undef C3
22892 }
@ PM_RANGE_FLAGS_EXCLUDE_END
...
Definition: ast.h:7844
@ PM_DEFINED_NODE
DefinedNode.
Definition: ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition: ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition: ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition: ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition: ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition: ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition: ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition: ast.h:628
@ PM_NIL_NODE
NilNode.
Definition: ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition: ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition: ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition: ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition: ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition: ast.h:691
@ PM_OR_NODE
OrNode.
Definition: ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition: ast.h:889
@ PM_IF_NODE
IfNode.
Definition: ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition: ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition: ast.h:724
@ PM_HASH_NODE
HashNode.
Definition: ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition: ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition: ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition: ast.h:760
@ PM_AND_NODE
AndNode.
Definition: ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition: ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition: ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition: ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition: ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition: ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition: ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition: ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition: ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition: ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition: ast.h:1000
@ PM_RESCUE_MODIFIER_NODE
RescueModifierNode.
Definition: ast.h:955
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition: ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition: ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition: ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition: ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition: ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition: ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition: ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition: ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition: ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition: ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition: ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition: ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition: ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition: ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition: ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition: ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition: ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition: ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition: ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition: ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition: ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition: ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition: ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition: ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition: ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition: ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition: ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition: ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition: ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition: ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition: ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition: ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition: ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition: ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition: ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition: ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition: ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition: ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition: ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition: ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition: ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition: ast.h:862
@ PM_STRING_NODE
StringNode.
Definition: ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition: ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition: ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition: ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition: ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition: ast.h:823
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition: ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition: ast.h:7927
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition: ast.h:7910
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition: ast.h:7907
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition: ast.h:7904
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition: ast.h:7736
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition: ast.h:7739
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition: ast.h:7742
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition: ast.h:1063
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition: ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition: ast.h:1053
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition: ast.h:7801
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition: ast.h:7798
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition: ast.h:7795
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition: ast.h:7792
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition: ast.h:7936
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition: ast.h:7764
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition: ast.h:7770
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition: ast.h:7767
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition: ast.h:7882
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition: ast.h:7855
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition: ast.h:1040
@ PM_TOKEN_STAR_STAR
**
Definition: ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ...
Definition: ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition: ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition: ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition: ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition: ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition: ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition: ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition: ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition: ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition: ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition: ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition: ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition: ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition: ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition: ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition: ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition: ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition: ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition: ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition: ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition: ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition: ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition: ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition: ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition: ast.h:328
@ PM_TOKEN_COMMA
,
Definition: ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition: ast.h:523
@ PM_TOKEN_GREATER
Definition: ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition: ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition: ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition: ast.h:505
@ PM_TOKEN_EMBVAR
Definition: ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition: ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition: ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition: ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition: ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition: ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition: ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition: ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition: ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition: ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
>>=
Definition: ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition: ast.h:409
@ PM_TOKEN_PERCENT
%
Definition: ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition: ast.h:274
@ PM_TOKEN_BANG
! or !@
Definition: ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition: ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition: ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition: ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition: ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition: ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition: ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition: ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition: ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition: ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition: ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition: ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition: ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition: ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition: ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition: ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition: ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition: ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition: ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition: ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition: ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition: ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition: ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition: ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition: ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition: ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition: ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition: ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition: ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition: ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition: ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition: ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition: ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition: ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition: ast.h:100
@ PM_TOKEN_PIPE
|
Definition: ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition: ast.h:397
@ PM_TOKEN_BANG_TILDE
!~
Definition: ast.h:67
@ PM_TOKEN_DOT
the .
Definition: ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition: ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition: ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition: ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition: ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition: ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition: ast.h:490
@ PM_TOKEN_MINUS
Definition: ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition: ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition: ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition: ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition: ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition: ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition: ast.h:217
@ PM_TOKEN_LESS
<
Definition: ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition: ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition: ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition: ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition: ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition: ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition: ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition: ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition: ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition: ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition: ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition: ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition: ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition: ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition: ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition: ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition: ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition: ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition: ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition: ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition: ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition: ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition: ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition: ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition: ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition: ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition: ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition: ast.h:271
@ PM_TOKEN_SLASH
/
Definition: ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition: ast.h:301
@ PM_TOKEN_COLON
:
Definition: ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition: ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition: ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition: ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition: ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition: ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition: ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition: ast.h:337
@ PM_TOKEN_EQUAL
=
Definition: ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition: ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ...
Definition: ast.h:499
@ PM_TOKEN_STAR
Definition: ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition: ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition: ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition: ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition: ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition: ast.h:448
@ PM_TOKEN_CARET
^
Definition: ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition: ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition: ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition: ast.h:277
@ PM_TOKEN_PLUS
Definition: ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition: ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition: ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition: ast.h:205
@ PM_TOKEN_LABEL
a label
Definition: ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition: ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition: ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition: ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition: ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition: ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition: ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition: ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition: ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition: ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition: ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition: ast.h:367
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition: ast.h:7781
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition: ast.h:7828
void pm_diagnostic_list_free(pm_list_t *list)
Deallocate the internal state of the given diagnostic list.
Definition: diagnostic.c:831
bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id,...)
Append a diagnostic to the given list of diagnostics that is using a format string for its message.
Definition: diagnostic.c:787
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition: diagnostic.h:29
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id)
Append a diagnostic to the given list of diagnostics that is using shared memory for its message.
Definition: diagnostic.c:766
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition: xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition: options.c:181
void pm_options_read(pm_options_t *options, const char *data)
Deserialize an options struct from the given binary string.
Definition: options.c:238
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition: options.h:185
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition: options.h:20
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition: options.c:154
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition: options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition: options.h:26
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition: options.c:173
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition: options.h:191
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition: options.h:71
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition: parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition: parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition: parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition: parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition: parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition: parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition: parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition: parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition: parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition: parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition: parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition: parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition: parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition: parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition: parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition: parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition: parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition: parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition: parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition: parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition: parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition: parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition: parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition: parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition: parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition: parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition: parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition: parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition: parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition: parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition: parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition: parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition: parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition: parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition: parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition: parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition: parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition: parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition: parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition: parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition: parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition: parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition: parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition: parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition: parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition: parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition: parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition: parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition: parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition: parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition: parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition: parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition: parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition: parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition: parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition: parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition: parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition: parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition: parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition: parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition: parser.h:435
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition: parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition: parser.h:448
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity)
Initialize a pm_buffer_t with the given capacity.
Definition: pm_buffer.c:15
void pm_buffer_append_format(pm_buffer_t *buffer, const char *format,...) PRISM_ATTRIBUTE_FORMAT(2
Append a formatted string to the buffer.
void void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length)
Append a string to the buffer.
Definition: pm_buffer.c:119
PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition: pm_buffer.c:43
void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value)
Append a single byte to the buffer.
Definition: pm_buffer.c:135
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition: pm_buffer.c:27
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value)
Append a 32-bit signed integer to the buffer as a variable-length integer.
Definition: pm_buffer.c:161
PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition: pm_buffer.c:35
PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition: pm_buffer.c:315
void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length)
Append a list of bytes to the buffer.
Definition: pm_buffer.c:127
size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are hexadecimal digits.
Definition: pm_char.c:249
bool pm_char_is_decimal_digit(const uint8_t b)
Returns true if the given character is a decimal digit.
Definition: pm_char.c:295
size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are whitespace.
Definition: pm_char.c:76
size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are hexadecimal digits or underscore...
Definition: pm_char.c:263
size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are decimal digits or underscores.
Definition: pm_char.c:239
size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are decimal digits.
Definition: pm_char.c:225
size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are binary digits or underscores.
Definition: pm_char.c:202
size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are octal digits or underscores.
Definition: pm_char.c:216
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list)
Returns the number of characters at the start of the string that are whitespace while also tracking t...
Definition: pm_char.c:86
bool pm_char_is_hexadecimal_digit(const uint8_t b)
Returns true if the given character is a hexadecimal digit.
Definition: pm_char.c:303
bool pm_char_is_octal_digit(const uint8_t b)
Returns true if the given character is an octal digit.
Definition: pm_char.c:287
bool pm_char_is_binary_digit(const uint8_t b)
Returns true if the given character is a binary digit.
Definition: pm_char.c:279
bool pm_char_is_inline_whitespace(const uint8_t b)
Returns true if the given character is an inline whitespace character.
Definition: pm_char.c:141
size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are inline whitespace.
Definition: pm_char.c:108
bool pm_char_is_whitespace(const uint8_t b)
Returns true if the given character is a whitespace character.
Definition: pm_char.c:133
size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are regexp options.
Definition: pm_char.c:117
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity)
Initialize a new constant pool with a given capacity.
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool that is a slice of a source string.
void pm_constant_id_list_free(pm_constant_id_list_t *list)
Free the memory associated with a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is constant.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id)
Insert a constant id into a list of constant ids at the specified index.
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id)
Append a constant id to a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is now owned by the constant pool.
void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity)
Initialize a list of constant ids with a given capacity.
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id)
Return a pointer to the constant indicated by the given constant id.
bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id)
Checks if the current constant id list includes the given constant id.
void pm_constant_pool_free(pm_constant_pool_t *pool)
Free the memory associated with a constant pool.
void pm_list_append(pm_list_t *list, pm_list_node_t *node)
Append a node to the given list.
Definition: pm_list.c:23
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding)
We need to roll our own memchr to handle cases where the encoding changes and we need to search for a...
Definition: pm_memchr.c:11
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line)
Returns the line and column of the given offset.
void pm_newline_list_free(pm_newline_list_t *list)
Free the internal memory allocated for the newline list.
int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line)
Returns the line of the given offset.
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity)
Initialize a new newline list with the given capacity.
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor)
Append a new offset to the newline list.
void pm_newline_list_clear(pm_newline_list_t *list)
Clear out the newlines that have been appended to the list.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition: pm_string.c:353
void pm_string_ensure_owned(pm_string_t *string)
Ensure the string is owned.
Definition: pm_string.c:316
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length)
Initialize an owned string that is responsible for freeing allocated memory.
Definition: pm_string.c:30
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end)
Initialize a shared string that is based on initial input.
Definition: pm_string.c:16
#define PM_STRING_EMPTY
Defines an empty string.
Definition: pm_string.h:70
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition: pm_string.c:369
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition: pm_string.c:361
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length)
Compare two strings, ignoring case, up to the given length.
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate)
Here we have rolled our own version of strpbrk.
Definition: pm_strpbrk.c:194
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition: defines.h:230
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition: defines.h:78
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition: defines.h:34
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition: defines.h:113
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition: defines.h:50
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
Definition: encoding.c:2346
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition: encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition: encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition: encoding.h:68
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
Definition: encoding.c:2164
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
Definition: encoding.c:5026
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
Definition: encoding.c:2287
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition: encoding.h:74
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node)
Deallocate a node and all of its children.
Definition: node.c:114
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition: node.h:17
void pm_node_list_free(pm_node_list_t *list)
Free the internal memory associated with the given node list.
Definition: node.c:88
void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other)
Concatenate the given node list onto the end of the other node list.
Definition: node.c:77
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node)
Append a new node onto the end of the node list.
Definition: node.c:55
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition: version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition: version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition: version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition: version.h:12
The main header file for the prism parser.
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid method name.
Definition: prism.c:22856
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition: prism.c:22444
PRISM_EXPORTED_FUNCTION const char * pm_version(void)
The prism version and the serialization format.
Definition: prism.c:7
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition: prism.c:22482
pm_string_query_t
Represents the results of a slice query.
Definition: prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition: prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition: prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition: prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition: serialize.c:2121
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition: prism.h:88
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the AST represented by the given node to the given buffer.
Definition: prism.c:22656
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data)
Parse and serialize the comments in the given source to the given buffer.
Definition: prism.c:22711
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data)
Parse and serialize the AST represented by the source that is read out of the given stream into to th...
Definition: prism.c:22690
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition: prism.c:22589
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Initiate the parser with the given parser.
Definition: prism.c:22508
PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data)
Parse the given source to the AST and dump the AST to the given buffer.
Definition: prism.c:22667
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition: serialize.c:2098
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition: prism.c:22189
PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data)
Parse the source and return true if it parses without errors or warnings.
Definition: prism.c:22612
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid constant name.
Definition: prism.c:22836
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid local variable name.
Definition: prism.c:22816
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition: serialize.c:2028
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition: token_type.c:362
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data)
Parse a regular expression.
Definition: regexp.c:777
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node)
Create a string-based representation of the given static literal.
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace)
Add a node to the set of static literals.
void pm_static_literals_free(pm_static_literals_t *literals)
Free the internal memory associated with the given static literals set.
This struct is used to pass information between the regular expression parser and the error callback.
Definition: prism.c:17950
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition: prism.c:17952
const uint8_t * start
The start of the regular expression.
Definition: prism.c:17955
bool shared
Whether or not the source of the regular expression is shared.
Definition: prism.c:17966
const uint8_t * end
The end of the regular expression.
Definition: prism.c:17958
This struct is used to pass information between the regular expression parser and the named capture c...
Definition: prism.c:20796
pm_constant_id_list_t names
The list of names that have been parsed.
Definition: prism.c:20807
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition: prism.c:20798
pm_match_write_node_t * match
The match write node that is being created.
Definition: prism.c:20804
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition: prism.c:20801
bool shared
Whether the content of the regular expression is shared.
Definition: prism.c:20814
AndNode.
Definition: ast.h:1263
struct pm_node * left
AndNode::left.
Definition: ast.h:1279
struct pm_node * right
AndNode::right.
Definition: ast.h:1292
ArgumentsNode.
Definition: ast.h:1324
pm_node_t base
The embedded base node.
Definition: ast.h:1326
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition: ast.h:1337
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition: prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition: prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition: prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition: prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition: prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition: prism.c:1583
ArrayNode.
Definition: ast.h:1355
struct pm_node_list elements
ArrayNode::elements.
Definition: ast.h:1365
ArrayPatternNode.
Definition: ast.h:1416
struct pm_node * constant
ArrayPatternNode::constant.
Definition: ast.h:1424
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition: ast.h:1464
pm_node_t base
The embedded base node.
Definition: ast.h:1418
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition: ast.h:1474
AssocNode.
Definition: ast.h:1489
struct pm_node * value
AssocNode::value.
Definition: ast.h:1521
struct pm_node * key
AssocNode::key.
Definition: ast.h:1508
BeginNode.
Definition: ast.h:1615
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition: ast.h:1668
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition: ast.h:1648
struct pm_statements_node * statements
BeginNode::statements.
Definition: ast.h:1638
pm_node_t base
The embedded base node.
Definition: ast.h:1617
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition: ast.h:1658
This struct represents a set of binding powers used for a given token.
Definition: prism.c:12900
bool binary
Whether or not this token can be used as a binary operator.
Definition: prism.c:12908
pm_binding_power_t left
The left binding power.
Definition: prism.c:12902
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition: prism.c:12914
pm_binding_power_t right
The right binding power.
Definition: prism.c:12905
BlockLocalVariableNode.
Definition: ast.h:1734
BlockNode.
Definition: ast.h:1762
BlockParameterNode.
Definition: ast.h:1838
BlockParametersNode.
Definition: ast.h:1892
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition: pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition: pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition: pm_buffer.h:30
CallNode.
Definition: ast.h:2119
pm_location_t opening_loc
CallNode::opening_loc.
Definition: ast.h:2180
pm_location_t closing_loc
CallNode::closing_loc.
Definition: ast.h:2200
struct pm_node * receiver
CallNode::receiver.
Definition: ast.h:2138
pm_constant_id_t name
CallNode::name.
Definition: ast.h:2161
pm_node_t base
The embedded base node.
Definition: ast.h:2121
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition: ast.h:2151
pm_location_t message_loc
CallNode::message_loc.
Definition: ast.h:2171
struct pm_arguments_node * arguments
CallNode::arguments.
Definition: ast.h:2190
struct pm_node * block
CallNode::block.
Definition: ast.h:2210
CaseMatchNode.
Definition: ast.h:2545
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition: ast.h:2568
CaseNode.
Definition: ast.h:2615
struct pm_node_list conditions
CaseNode::conditions.
Definition: ast.h:2638
ClassVariableReadNode.
Definition: ast.h:2880
ClassVariableTargetNode.
Definition: ast.h:2909
ClassVariableWriteNode.
Definition: ast.h:2932
This is a node in the linked list of comments that we've found while parsing.
Definition: parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition: parser.h:466
pm_location_t location
The location of the comment in the source.
Definition: parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition: ast.h:3146
ConstantPathTargetNode.
Definition: ast.h:3284
ConstantReadNode.
Definition: ast.h:3379
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition: ast.h:3408
ConstantWriteNode.
Definition: ast.h:3431
This is a node in a linked list of contexts.
Definition: parser.h:439
pm_context_t context
The context that this node represents.
Definition: parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition: parser.h:444
This struct represents a diagnostic generated during parsing.
Definition: diagnostic.h:359
pm_list_node_t node
The embedded base node.
Definition: diagnostic.h:361
pm_diagnostic_id_t diag_id
The ID of the diagnostic.
Definition: diagnostic.h:367
ElseNode.
Definition: ast.h:3610
struct pm_statements_node * statements
ElseNode::statements.
Definition: ast.h:3623
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition: encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition: encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition: encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition: encoding.h:50
const char * name
The name of the encoding.
Definition: encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition: encoding.h:43
EnsureNode.
Definition: ast.h:3708
struct pm_statements_node * statements
EnsureNode::statements.
Definition: ast.h:3721
FindPatternNode.
Definition: ast.h:3765
struct pm_node * constant
FindPatternNode::constant.
Definition: ast.h:3773
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition: ast.h:3793
pm_node_t base
The embedded base node.
Definition: ast.h:3767
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition: ast.h:3798
FlipFlopNode.
Definition: ast.h:3816
FloatNode.
Definition: ast.h:3849
double value
FloatNode::value.
Definition: ast.h:3859
pm_node_t base
The embedded base node.
Definition: ast.h:3851
ForwardingParameterNode.
Definition: ast.h:3985
GlobalVariableReadNode.
Definition: ast.h:4145
GlobalVariableTargetNode.
Definition: ast.h:4174
GlobalVariableWriteNode.
Definition: ast.h:4197
HashNode.
Definition: ast.h:4259
struct pm_node_list elements
HashNode::elements.
Definition: ast.h:4285
HashPatternNode.
Definition: ast.h:4313
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition: ast.h:4336
pm_node_t base
The embedded base node.
Definition: ast.h:4315
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition: ast.h:4341
struct pm_node * constant
HashPatternNode::constant.
Definition: ast.h:4321
All of the information necessary to store to lexing a heredoc.
Definition: parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition: parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition: parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition: parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition: parser.h:90
IfNode.
Definition: ast.h:4362
struct pm_statements_node * statements
IfNode::statements.
Definition: ast.h:4422
struct pm_node * subsequent
IfNode::subsequent.
Definition: ast.h:4441
ImaginaryNode.
Definition: ast.h:4468
InstanceVariableReadNode.
Definition: ast.h:4958
InstanceVariableTargetNode.
Definition: ast.h:4987
InstanceVariableWriteNode.
Definition: ast.h:5010
IntegerNode.
Definition: ast.h:5078
pm_integer_t value
IntegerNode::value.
Definition: ast.h:5088
pm_node_t base
The embedded base node.
Definition: ast.h:5080
bool negative
Whether or not the integer is negative.
Definition: pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition: ast.h:5116
InterpolatedRegularExpressionNode.
Definition: ast.h:5162
InterpolatedStringNode.
Definition: ast.h:5199
pm_node_t base
The embedded base node.
Definition: ast.h:5201
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition: ast.h:5207
InterpolatedSymbolNode.
Definition: ast.h:5232
pm_node_t base
The embedded base node.
Definition: ast.h:5234
InterpolatedXStringNode.
Definition: ast.h:5265
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition: ast.h:5273
pm_node_t base
The embedded base node.
Definition: ast.h:5267
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition: ast.h:5278
KeywordHashNode.
Definition: ast.h:5337
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition: parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition: parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition: parser.h:109
enum pm_lex_mode::@91 mode
The type of this lex mode.
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition: parser.h:254
union pm_lex_mode::@92 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition: parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition: pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition: pm_list.h:48
This represents the overall linked list.
Definition: pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition: pm_list.h:60
size_t size
The size of the list.
Definition: pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition: parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition: parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition: parser.h:537
uint32_t hash
The hash of the local variable.
Definition: parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition: parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition: parser.h:543
LocalVariableReadNode.
Definition: ast.h:5579
uint32_t depth
LocalVariableReadNode::depth.
Definition: ast.h:5610
pm_constant_id_t name
LocalVariableReadNode::name.
Definition: ast.h:5597
LocalVariableTargetNode.
Definition: ast.h:5625
LocalVariableWriteNode.
Definition: ast.h:5653
uint32_t depth
LocalVariableWriteNode::depth.
Definition: ast.h:5680
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition: ast.h:5667
This is a set of local variables in a certain lexical context (method, class, module,...
Definition: parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition: parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition: parser.h:559
uint32_t size
The number of local variables in the set.
Definition: parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition: ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition: ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition: ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition: parser.h:475
MatchLastLineNode.
Definition: ast.h:5745
MatchWriteNode.
Definition: ast.h:5849
struct pm_node_list targets
MatchWriteNode::targets.
Definition: ast.h:5862
MultiTargetNode.
Definition: ast.h:5945
pm_node_t base
The embedded base node.
Definition: ast.h:5947
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition: ast.h:6003
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition: ast.h:5963
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition: ast.h:6013
MultiWriteNode.
Definition: ast.h:6028
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition: ast.h:558
size_t size
The number of nodes in the list.
Definition: ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition: ast.h:566
This is the base structure that represents a node in the syntax tree.
Definition: ast.h:1069
pm_node_type_t type
This represents the type of the node.
Definition: ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition: ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition: ast.h:1092
OptionalParameterNode.
Definition: ast.h:6301
A scope of locals surrounding the code that is being parsed.
Definition: options.h:36
size_t locals_count
The number of locals in the scope.
Definition: options.h:38
The options that can be passed to the parser.
Definition: options.h:77
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition: options.h:126
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition: options.h:88
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition: options.h:142
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition: options.h:149
pm_string_t encoding
The name of the encoding that the source file is in.
Definition: options.h:103
int32_t line
The line within the file that the parse starts on.
Definition: options.h:97
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition: options.h:82
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition: options.h:135
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition: options.h:159
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition: options.h:108
pm_string_t filepath
The name of the file that is currently being parsed.
Definition: options.h:91
pm_options_version_t version
The version of prism that we should be parsing with.
Definition: options.h:123
OrNode.
Definition: ast.h:6339
struct pm_node * left
OrNode::left.
Definition: ast.h:6355
struct pm_node * right
OrNode::right.
Definition: ast.h:6368
ParametersNode.
Definition: ast.h:6394
struct pm_node * rest
ParametersNode::rest.
Definition: ast.h:6412
struct pm_block_parameter_node * block
ParametersNode::block.
Definition: ast.h:6432
pm_node_t base
The embedded base node.
Definition: ast.h:6396
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition: ast.h:6427
ParenthesesNode.
Definition: ast.h:6447
struct pm_node * body
ParenthesesNode::body.
Definition: ast.h:6455
This struct represents the overall parser.
Definition: parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition: parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition: parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition: parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition: parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition: parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition: parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition: parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition: parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition: parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition: parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition: parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition: parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition: parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition: parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition: parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition: parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition: parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition: parser.h:856
pm_token_t previous
The previous token we were considering.
Definition: parser.h:697
struct pm_parser::@97 lex_modes
A stack of lex modes.
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition: parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition: parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition: parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition: parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition: parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition: parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition: parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition: parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition: parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition: parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition: parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition: parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition: parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition: parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition: parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition: parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition: parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition: parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition: parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition: parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition: parser.h:718
size_t index
The current index into the lexer mode stack.
Definition: parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition: parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition: parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition: parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition: parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition: parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition: parser.h:646
RangeNode.
Definition: ast.h:6653
struct pm_node * right
RangeNode::right.
Definition: ast.h:6683
struct pm_node * left
RangeNode::left.
Definition: ast.h:6669
RationalNode.
Definition: ast.h:6711
pm_node_t base
The embedded base node.
Definition: ast.h:6713
pm_integer_t numerator
RationalNode::numerator.
Definition: ast.h:6723
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition: prism.c:10371
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition: prism.c:10376
pm_token_buffer_t base
The embedded base buffer.
Definition: prism.c:10373
RegularExpressionNode.
Definition: ast.h:6778
pm_node_t base
The embedded base node.
Definition: ast.h:6780
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition: ast.h:6801
RequiredParameterNode.
Definition: ast.h:6852
RescueModifierNode.
Definition: ast.h:6875
struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
Definition: ast.h:6893
RescueNode.
Definition: ast.h:6913
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition: ast.h:6946
pm_node_t base
The embedded base node.
Definition: ast.h:6915
This struct represents a node in a linked list of scopes.
Definition: parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition: parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition: parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition: parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition: parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition: parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition: parser.h:626
SplatNode.
Definition: ast.h:7208
struct pm_node * expression
SplatNode::expression.
Definition: ast.h:7221
StatementsNode.
Definition: ast.h:7236
struct pm_node_list body
StatementsNode::body.
Definition: ast.h:7244
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition: ast.h:7271
pm_node_t base
The embedded base node.
Definition: ast.h:7273
pm_string_t unescaped
StringNode::unescaped.
Definition: ast.h:7294
pm_location_t closing_loc
StringNode::closing_loc.
Definition: ast.h:7289
pm_location_t opening_loc
StringNode::opening_loc.
Definition: ast.h:7279
A generic string type that can have various ownership semantics.
Definition: pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition: pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition: pm_string.h:38
enum pm_string_t::@98 type
The type of the string.
SymbolNode.
Definition: ast.h:7363
pm_location_t value_loc
SymbolNode::value_loc.
Definition: ast.h:7376
pm_string_t unescaped
SymbolNode::unescaped.
Definition: ast.h:7386
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition: prism.c:10345
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition: prism.c:10350
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition: prism.c:10356
This struct represents a token in the Ruby source.
Definition: ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition: ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition: ast.h:535
pm_token_type_t type
The type of the token.
Definition: ast.h:532
UndefNode.
Definition: ast.h:7419
UnlessNode.
Definition: ast.h:7450
struct pm_statements_node * statements
UnlessNode::statements.
Definition: ast.h:7500
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition: ast.h:7510
WhenNode.
Definition: ast.h:7581
XStringNode.
Definition: ast.h:7667