Ruby  3.4.0dev (2024-11-22 revision 0989400a925cd201defdca9eb28eb87200b30785)
prism.c
1 #include "prism.h"
2 
6 const char *
7 pm_version(void) {
8  return PRISM_VERSION;
9 }
10 
15 #define PM_TAB_WHITESPACE_SIZE 8
16 
17 // Macros for min/max.
18 #define MIN(a,b) (((a)<(b))?(a):(b))
19 #define MAX(a,b) (((a)>(b))?(a):(b))
20 
21 /******************************************************************************/
22 /* Lex mode manipulations */
23 /******************************************************************************/
24 
29 static inline uint8_t
30 lex_mode_incrementor(const uint8_t start) {
31  switch (start) {
32  case '(':
33  case '[':
34  case '{':
35  case '<':
36  return start;
37  default:
38  return '\0';
39  }
40 }
41 
46 static inline uint8_t
47 lex_mode_terminator(const uint8_t start) {
48  switch (start) {
49  case '(':
50  return ')';
51  case '[':
52  return ']';
53  case '{':
54  return '}';
55  case '<':
56  return '>';
57  default:
58  return start;
59  }
60 }
61 
67 static bool
68 lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69  lex_mode.prev = parser->lex_modes.current;
70  parser->lex_modes.index++;
71 
72  if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
73  parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
74  if (parser->lex_modes.current == NULL) return false;
75 
76  *parser->lex_modes.current = lex_mode;
77  } else {
78  parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79  parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80  }
81 
82  return true;
83 }
84 
88 static inline bool
89 lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90  uint8_t incrementor = lex_mode_incrementor(delimiter);
91  uint8_t terminator = lex_mode_terminator(delimiter);
92 
93  pm_lex_mode_t lex_mode = {
94  .mode = PM_LEX_LIST,
95  .as.list = {
96  .nesting = 0,
97  .interpolation = interpolation,
98  .incrementor = incrementor,
99  .terminator = terminator
100  }
101  };
102 
103  // These are the places where we need to split up the content of the list.
104  // We'll use strpbrk to find the first of these characters.
105  uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106  memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107  size_t index = 7;
108 
109  // Now we'll add the terminator to the list of breakpoints. If the
110  // terminator is not already a NULL byte, add it to the list.
111  if (terminator != '\0') {
112  breakpoints[index++] = terminator;
113  }
114 
115  // If interpolation is allowed, then we're going to check for the #
116  // character. Otherwise we'll only look for escapes and the terminator.
117  if (interpolation) {
118  breakpoints[index++] = '#';
119  }
120 
121  // If there is an incrementor, then we'll check for that as well.
122  if (incrementor != '\0') {
123  breakpoints[index++] = incrementor;
124  }
125 
126  parser->explicit_encoding = NULL;
127  return lex_mode_push(parser, lex_mode);
128 }
129 
135 static inline bool
136 lex_mode_push_list_eof(pm_parser_t *parser) {
137  return lex_mode_push_list(parser, false, '\0');
138 }
139 
143 static inline bool
144 lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145  pm_lex_mode_t lex_mode = {
146  .mode = PM_LEX_REGEXP,
147  .as.regexp = {
148  .nesting = 0,
149  .incrementor = incrementor,
150  .terminator = terminator
151  }
152  };
153 
154  // These are the places where we need to split up the content of the
155  // regular expression. We'll use strpbrk to find the first of these
156  // characters.
157  uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158  memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159  size_t index = 4;
160 
161  // First we'll add the terminator.
162  if (terminator != '\0') {
163  breakpoints[index++] = terminator;
164  }
165 
166  // Next, if there is an incrementor, then we'll check for that as well.
167  if (incrementor != '\0') {
168  breakpoints[index++] = incrementor;
169  }
170 
171  parser->explicit_encoding = NULL;
172  return lex_mode_push(parser, lex_mode);
173 }
174 
178 static inline bool
179 lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180  pm_lex_mode_t lex_mode = {
181  .mode = PM_LEX_STRING,
182  .as.string = {
183  .nesting = 0,
184  .interpolation = interpolation,
185  .label_allowed = label_allowed,
186  .incrementor = incrementor,
187  .terminator = terminator
188  }
189  };
190 
191  // These are the places where we need to split up the content of the
192  // string. We'll use strpbrk to find the first of these characters.
193  uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194  memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195  size_t index = 3;
196 
197  // Now add in the terminator. If the terminator is not already a NULL byte,
198  // then we'll add it.
199  if (terminator != '\0') {
200  breakpoints[index++] = terminator;
201  }
202 
203  // If interpolation is allowed, then we're going to check for the #
204  // character. Otherwise we'll only look for escapes and the terminator.
205  if (interpolation) {
206  breakpoints[index++] = '#';
207  }
208 
209  // If we have an incrementor, then we'll add that in as a breakpoint as
210  // well.
211  if (incrementor != '\0') {
212  breakpoints[index++] = incrementor;
213  }
214 
215  parser->explicit_encoding = NULL;
216  return lex_mode_push(parser, lex_mode);
217 }
218 
224 static inline bool
225 lex_mode_push_string_eof(pm_parser_t *parser) {
226  return lex_mode_push_string(parser, false, false, '\0', '\0');
227 }
228 
234 static void
235 lex_mode_pop(pm_parser_t *parser) {
236  if (parser->lex_modes.index == 0) {
237  parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238  } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239  parser->lex_modes.index--;
240  parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241  } else {
242  parser->lex_modes.index--;
243  pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244  xfree(parser->lex_modes.current);
245  parser->lex_modes.current = prev;
246  }
247 }
248 
252 static inline bool
253 lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254  return parser->lex_state & state;
255 }
256 
257 typedef enum {
258  PM_IGNORED_NEWLINE_NONE = 0,
259  PM_IGNORED_NEWLINE_ALL,
260  PM_IGNORED_NEWLINE_PATTERN
261 } pm_ignored_newline_type_t;
262 
263 static inline pm_ignored_newline_type_t
264 lex_state_ignored_p(pm_parser_t *parser) {
265  bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266 
267  if (ignored) {
268  return PM_IGNORED_NEWLINE_ALL;
269  } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270  return PM_IGNORED_NEWLINE_PATTERN;
271  } else {
272  return PM_IGNORED_NEWLINE_NONE;
273  }
274 }
275 
276 static inline bool
277 lex_state_beg_p(pm_parser_t *parser) {
278  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279 }
280 
281 static inline bool
282 lex_state_arg_p(pm_parser_t *parser) {
283  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284 }
285 
286 static inline bool
287 lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288  if (parser->current.end >= parser->end) {
289  return false;
290  }
291  return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292 }
293 
294 static inline bool
295 lex_state_end_p(pm_parser_t *parser) {
296  return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297 }
298 
302 static inline bool
303 lex_state_operator_p(pm_parser_t *parser) {
304  return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305 }
306 
311 static inline void
312 lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313  parser->lex_state = state;
314 }
315 
316 #ifndef PM_DEBUG_LOGGING
321 #define PM_DEBUG_LOGGING 0
322 #endif
323 
324 #if PM_DEBUG_LOGGING
325 PRISM_ATTRIBUTE_UNUSED static void
326 debug_state(pm_parser_t *parser) {
327  fprintf(stderr, "STATE: ");
328  bool first = true;
329 
330  if (parser->lex_state == PM_LEX_STATE_NONE) {
331  fprintf(stderr, "NONE\n");
332  return;
333  }
334 
335 #define CHECK_STATE(state) \
336  if (parser->lex_state & state) { \
337  if (!first) fprintf(stderr, "|"); \
338  fprintf(stderr, "%s", #state); \
339  first = false; \
340  }
341 
342  CHECK_STATE(PM_LEX_STATE_BEG)
343  CHECK_STATE(PM_LEX_STATE_END)
344  CHECK_STATE(PM_LEX_STATE_ENDARG)
345  CHECK_STATE(PM_LEX_STATE_ENDFN)
346  CHECK_STATE(PM_LEX_STATE_ARG)
347  CHECK_STATE(PM_LEX_STATE_CMDARG)
348  CHECK_STATE(PM_LEX_STATE_MID)
349  CHECK_STATE(PM_LEX_STATE_FNAME)
350  CHECK_STATE(PM_LEX_STATE_DOT)
351  CHECK_STATE(PM_LEX_STATE_CLASS)
352  CHECK_STATE(PM_LEX_STATE_LABEL)
353  CHECK_STATE(PM_LEX_STATE_LABELED)
354  CHECK_STATE(PM_LEX_STATE_FITEM)
355 
356 #undef CHECK_STATE
357 
358  fprintf(stderr, "\n");
359 }
360 
361 static void
362 debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363  fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364  debug_state(parser);
365  lex_state_set(parser, state);
366  fprintf(stderr, "Now: ");
367  debug_state(parser);
368  fprintf(stderr, "\n");
369 }
370 
371 #define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372 #endif
373 
374 /******************************************************************************/
375 /* Command-line macro helpers */
376 /******************************************************************************/
377 
379 #define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380 
382 #define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383 
385 #define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386 
388 #define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389 
391 #define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392 
394 #define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395 
397 #define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398 
399 /******************************************************************************/
400 /* Diagnostic-related functions */
401 /******************************************************************************/
402 
406 static inline void
407 pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408  pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409 }
410 
414 #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415  pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416 
421 static inline void
422 pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423  pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424 }
425 
430 #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431  PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432 
437 static inline void
438 pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439  pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440 }
441 
446 #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447  PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448 
453 #define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454  PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455 
460 static inline void
461 pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462  pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463 }
464 
469 static inline void
470 pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471  pm_parser_err(parser, token->start, token->end, diag_id);
472 }
473 
478 #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479  PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480 
485 #define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486  PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487 
491 static inline void
492 pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493  pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494 }
495 
500 static inline void
501 pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502  pm_parser_warn(parser, token->start, token->end, diag_id);
503 }
504 
509 static inline void
510 pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511  pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512 }
513 
517 #define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518  pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519 
524 #define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525  PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526 
531 #define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532  PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533 
538 #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540 
546 static void
547 pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548  PM_PARSER_ERR_FORMAT(
549  parser,
550  ident_start,
551  ident_start + ident_length,
552  PM_ERR_HEREDOC_TERM,
553  (int) ident_length,
554  (const char *) ident_start
555  );
556 }
557 
558 /******************************************************************************/
559 /* Scope-related functions */
560 /******************************************************************************/
561 
565 static bool
566 pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567  pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568  if (scope == NULL) return false;
569 
570  *scope = (pm_scope_t) {
571  .previous = parser->current_scope,
572  .locals = { 0 },
573  .parameters = PM_SCOPE_PARAMETERS_NONE,
574  .implicit_parameters = { 0 },
575  .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576  .closed = closed
577  };
578 
579  parser->current_scope = scope;
580  return true;
581 }
582 
587 static bool
588 pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589  pm_scope_t *scope = parser->current_scope;
590 
591  do {
592  if (scope->previous == NULL) return true;
593  if (scope->closed) return false;
594  } while ((scope = scope->previous) != NULL);
595 
596  assert(false && "unreachable");
597  return true;
598 }
599 
603 static pm_scope_t *
604 pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605  pm_scope_t *scope = parser->current_scope;
606 
607  while (depth-- > 0) {
608  assert(scope != NULL);
609  scope = scope->previous;
610  }
611 
612  return scope;
613 }
614 
615 typedef enum {
616  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619 } pm_scope_forwarding_param_check_result_t;
620 
621 static pm_scope_forwarding_param_check_result_t
622 pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623  pm_scope_t *scope = parser->current_scope;
624  bool conflict = false;
625 
626  while (scope != NULL) {
627  if (scope->parameters & mask) {
628  if (scope->closed) {
629  if (conflict) {
630  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631  } else {
632  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633  }
634  }
635 
636  conflict = true;
637  }
638 
639  if (scope->closed) break;
640  scope = scope->previous;
641  }
642 
643  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644 }
645 
646 static void
647 pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650  // Pass.
651  break;
652  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654  break;
655  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657  break;
658  }
659 }
660 
661 static void
662 pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665  // Pass.
666  break;
667  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669  break;
670  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672  break;
673  }
674 }
675 
676 static void
677 pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680  // Pass.
681  break;
682  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683  // This shouldn't happen, because ... is not allowed in the
684  // declaration of blocks. If we get here, we assume we already have
685  // an error for this.
686  break;
687  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689  break;
690  }
691 }
692 
693 static void
694 pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697  // Pass.
698  break;
699  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701  break;
702  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704  break;
705  }
706 }
707 
711 static inline pm_shareable_constant_value_t
712 pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713  return parser->current_scope->shareable_constant;
714 }
715 
720 static void
721 pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722  pm_scope_t *scope = parser->current_scope;
723 
724  do {
725  scope->shareable_constant = shareable_constant;
726  } while (!scope->closed && (scope = scope->previous) != NULL);
727 }
728 
729 /******************************************************************************/
730 /* Local variable-related functions */
731 /******************************************************************************/
732 
736 #define PM_LOCALS_HASH_THRESHOLD 9
737 
738 static void
739 pm_locals_free(pm_locals_t *locals) {
740  if (locals->capacity > 0) {
741  xfree(locals->locals);
742  }
743 }
744 
749 static uint32_t
750 pm_locals_hash(pm_constant_id_t name) {
751  name = ((name >> 16) ^ name) * 0x45d9f3b;
752  name = ((name >> 16) ^ name) * 0x45d9f3b;
753  name = (name >> 16) ^ name;
754  return name;
755 }
756 
761 static void
762 pm_locals_resize(pm_locals_t *locals) {
763  uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764  assert(next_capacity > locals->capacity);
765 
766  pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767  if (next_locals == NULL) abort();
768 
769  if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770  if (locals->size > 0) {
771  memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772  }
773  } else {
774  // If we just switched from a list to a hash, then we need to fill in
775  // the hash values of all of the locals.
776  bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777  uint32_t mask = next_capacity - 1;
778 
779  for (uint32_t index = 0; index < locals->capacity; index++) {
780  pm_local_t *local = &locals->locals[index];
781 
782  if (local->name != PM_CONSTANT_ID_UNSET) {
783  if (hash_needed) local->hash = pm_locals_hash(local->name);
784 
785  uint32_t hash = local->hash;
786  while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787  next_locals[hash & mask] = *local;
788  }
789  }
790  }
791 
792  pm_locals_free(locals);
793  locals->locals = next_locals;
794  locals->capacity = next_capacity;
795 }
796 
812 static bool
813 pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814  if (locals->size >= (locals->capacity / 4 * 3)) {
815  pm_locals_resize(locals);
816  }
817 
818  if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819  for (uint32_t index = 0; index < locals->capacity; index++) {
820  pm_local_t *local = &locals->locals[index];
821 
822  if (local->name == PM_CONSTANT_ID_UNSET) {
823  *local = (pm_local_t) {
824  .name = name,
825  .location = { .start = start, .end = end },
826  .index = locals->size++,
827  .reads = reads,
828  .hash = 0
829  };
830  return true;
831  } else if (local->name == name) {
832  return false;
833  }
834  }
835  } else {
836  uint32_t mask = locals->capacity - 1;
837  uint32_t hash = pm_locals_hash(name);
838  uint32_t initial_hash = hash;
839 
840  do {
841  pm_local_t *local = &locals->locals[hash & mask];
842 
843  if (local->name == PM_CONSTANT_ID_UNSET) {
844  *local = (pm_local_t) {
845  .name = name,
846  .location = { .start = start, .end = end },
847  .index = locals->size++,
848  .reads = reads,
849  .hash = initial_hash
850  };
851  return true;
852  } else if (local->name == name) {
853  return false;
854  } else {
855  hash++;
856  }
857  } while ((hash & mask) != initial_hash);
858  }
859 
860  assert(false && "unreachable");
861  return true;
862 }
863 
868 static uint32_t
869 pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870  if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871  for (uint32_t index = 0; index < locals->size; index++) {
872  pm_local_t *local = &locals->locals[index];
873  if (local->name == name) return index;
874  }
875  } else {
876  uint32_t mask = locals->capacity - 1;
877  uint32_t hash = pm_locals_hash(name);
878  uint32_t initial_hash = hash & mask;
879 
880  do {
881  pm_local_t *local = &locals->locals[hash & mask];
882 
883  if (local->name == PM_CONSTANT_ID_UNSET) {
884  return UINT32_MAX;
885  } else if (local->name == name) {
886  return hash & mask;
887  } else {
888  hash++;
889  }
890  } while ((hash & mask) != initial_hash);
891  }
892 
893  return UINT32_MAX;
894 }
895 
900 static void
901 pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902  uint32_t index = pm_locals_find(locals, name);
903  assert(index != UINT32_MAX);
904 
905  pm_local_t *local = &locals->locals[index];
906  assert(local->reads < UINT32_MAX);
907 
908  local->reads++;
909 }
910 
915 static void
916 pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917  uint32_t index = pm_locals_find(locals, name);
918  assert(index != UINT32_MAX);
919 
920  pm_local_t *local = &locals->locals[index];
921  assert(local->reads > 0);
922 
923  local->reads--;
924 }
925 
929 static uint32_t
930 pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931  uint32_t index = pm_locals_find(locals, name);
932  assert(index != UINT32_MAX);
933 
934  return locals->locals[index].reads;
935 }
936 
945 static void
946 pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
948 
949  // If we're still below the threshold for switching to a hash, then we only
950  // need to loop over the locals until we hit the size because the locals are
951  // stored in a list.
952  uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953 
954  // We will only warn for unused variables if we're not at the top level, or
955  // if we're parsing a file outside of eval or -e.
956  bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957 
958  for (uint32_t index = 0; index < capacity; index++) {
959  pm_local_t *local = &locals->locals[index];
960 
961  if (local->name != PM_CONSTANT_ID_UNSET) {
962  pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963 
964  if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966 
967  if (constant->length >= 1 && *constant->start != '_') {
968  PM_PARSER_WARN_FORMAT(
969  parser,
970  local->location.start,
971  local->location.end,
972  PM_WARN_UNUSED_LOCAL_VARIABLE,
973  (int) constant->length,
974  (const char *) constant->start
975  );
976  }
977  }
978  }
979  }
980 }
981 
982 /******************************************************************************/
983 /* Node-related functions */
984 /******************************************************************************/
985 
989 static inline pm_constant_id_t
990 pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991  return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992 }
993 
997 static inline pm_constant_id_t
998 pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999  return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000 }
1001 
1005 static inline pm_constant_id_t
1006 pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007  return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008 }
1009 
1013 static inline pm_constant_id_t
1014 pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015  return pm_parser_constant_id_location(parser, token->start, token->end);
1016 }
1017 
1022 static inline pm_constant_id_t
1023 pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024  return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025 }
1026 
1032 static pm_node_t *
1033 pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034  pm_node_t *void_node = NULL;
1035 
1036  while (node != NULL) {
1037  switch (PM_NODE_TYPE(node)) {
1038  case PM_RETURN_NODE:
1039  case PM_BREAK_NODE:
1040  case PM_NEXT_NODE:
1041  case PM_REDO_NODE:
1042  case PM_RETRY_NODE:
1044  return void_node != NULL ? void_node : node;
1046  return NULL;
1047  case PM_BEGIN_NODE: {
1048  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049 
1050  if (cast->ensure_clause != NULL) {
1051  if (cast->rescue_clause != NULL) {
1052  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053  if (vn != NULL) return vn;
1054  }
1055 
1056  if (cast->statements != NULL) {
1057  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058  if (vn != NULL) return vn;
1059  }
1060 
1061  node = (pm_node_t *) cast->ensure_clause;
1062  } else if (cast->rescue_clause != NULL) {
1063  if (cast->statements == NULL) return NULL;
1064 
1065  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066  if (vn == NULL) return NULL;
1067  if (void_node == NULL) void_node = vn;
1068 
1069  for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071  if (vn == NULL) {
1072  void_node = NULL;
1073  break;
1074  }
1075  if (void_node == NULL) {
1076  void_node = vn;
1077  }
1078  }
1079 
1080  if (cast->else_clause != NULL) {
1081  node = (pm_node_t *) cast->else_clause;
1082  } else {
1083  return void_node;
1084  }
1085  } else {
1086  node = (pm_node_t *) cast->statements;
1087  }
1088 
1089  break;
1090  }
1091  case PM_ENSURE_NODE: {
1092  pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093  node = (pm_node_t *) cast->statements;
1094  break;
1095  }
1096  case PM_PARENTHESES_NODE: {
1098  node = (pm_node_t *) cast->body;
1099  break;
1100  }
1101  case PM_STATEMENTS_NODE: {
1102  pm_statements_node_t *cast = (pm_statements_node_t *) node;
1103  node = cast->body.nodes[cast->body.size - 1];
1104  break;
1105  }
1106  case PM_IF_NODE: {
1107  pm_if_node_t *cast = (pm_if_node_t *) node;
1108  if (cast->statements == NULL || cast->subsequent == NULL) {
1109  return NULL;
1110  }
1111  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112  if (vn == NULL) {
1113  return NULL;
1114  }
1115  if (void_node == NULL) {
1116  void_node = vn;
1117  }
1118  node = cast->subsequent;
1119  break;
1120  }
1121  case PM_UNLESS_NODE: {
1122  pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123  if (cast->statements == NULL || cast->else_clause == NULL) {
1124  return NULL;
1125  }
1126  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127  if (vn == NULL) {
1128  return NULL;
1129  }
1130  if (void_node == NULL) {
1131  void_node = vn;
1132  }
1133  node = (pm_node_t *) cast->else_clause;
1134  break;
1135  }
1136  case PM_ELSE_NODE: {
1137  pm_else_node_t *cast = (pm_else_node_t *) node;
1138  node = (pm_node_t *) cast->statements;
1139  break;
1140  }
1141  case PM_AND_NODE: {
1142  pm_and_node_t *cast = (pm_and_node_t *) node;
1143  node = cast->left;
1144  break;
1145  }
1146  case PM_OR_NODE: {
1147  pm_or_node_t *cast = (pm_or_node_t *) node;
1148  node = cast->left;
1149  break;
1150  }
1153 
1154  pm_scope_t *scope = parser->current_scope;
1155  for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156 
1157  pm_locals_read(&scope->locals, cast->name);
1158  return NULL;
1159  }
1160  default:
1161  return NULL;
1162  }
1163  }
1164 
1165  return NULL;
1166 }
1167 
1168 static inline void
1169 pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170  pm_node_t *void_node = pm_check_value_expression(parser, node);
1171  if (void_node != NULL) {
1172  pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173  }
1174 }
1175 
1179 static void
1180 pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181  const char *type = NULL;
1182  int length = 0;
1183 
1184  switch (PM_NODE_TYPE(node)) {
1191  type = "a variable";
1192  length = 10;
1193  break;
1194  case PM_CALL_NODE: {
1195  const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196  if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197 
1198  const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199  switch (message->length) {
1200  case 1:
1201  switch (message->start[0]) {
1202  case '+':
1203  case '-':
1204  case '*':
1205  case '/':
1206  case '%':
1207  case '|':
1208  case '^':
1209  case '&':
1210  case '>':
1211  case '<':
1212  type = (const char *) message->start;
1213  length = 1;
1214  break;
1215  }
1216  break;
1217  case 2:
1218  switch (message->start[1]) {
1219  case '=':
1220  if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221  type = (const char *) message->start;
1222  length = 2;
1223  }
1224  break;
1225  case '@':
1226  if (message->start[0] == '+' || message->start[0] == '-') {
1227  type = (const char *) message->start;
1228  length = 2;
1229  }
1230  break;
1231  case '*':
1232  if (message->start[0] == '*') {
1233  type = (const char *) message->start;
1234  length = 2;
1235  }
1236  break;
1237  }
1238  break;
1239  case 3:
1240  if (memcmp(message->start, "<=>", 3) == 0) {
1241  type = "<=>";
1242  length = 3;
1243  }
1244  break;
1245  }
1246 
1247  break;
1248  }
1249  case PM_CONSTANT_PATH_NODE:
1250  type = "::";
1251  length = 2;
1252  break;
1253  case PM_CONSTANT_READ_NODE:
1254  type = "a constant";
1255  length = 10;
1256  break;
1257  case PM_DEFINED_NODE:
1258  type = "defined?";
1259  length = 8;
1260  break;
1261  case PM_FALSE_NODE:
1262  type = "false";
1263  length = 5;
1264  break;
1265  case PM_FLOAT_NODE:
1266  case PM_IMAGINARY_NODE:
1267  case PM_INTEGER_NODE:
1270  case PM_RATIONAL_NODE:
1273  case PM_SOURCE_FILE_NODE:
1274  case PM_SOURCE_LINE_NODE:
1275  case PM_STRING_NODE:
1276  case PM_SYMBOL_NODE:
1277  type = "a literal";
1278  length = 9;
1279  break;
1280  case PM_NIL_NODE:
1281  type = "nil";
1282  length = 3;
1283  break;
1284  case PM_RANGE_NODE: {
1285  const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286 
1288  type = "...";
1289  length = 3;
1290  } else {
1291  type = "..";
1292  length = 2;
1293  }
1294 
1295  break;
1296  }
1297  case PM_SELF_NODE:
1298  type = "self";
1299  length = 4;
1300  break;
1301  case PM_TRUE_NODE:
1302  type = "true";
1303  length = 4;
1304  break;
1305  default:
1306  break;
1307  }
1308 
1309  if (type != NULL) {
1310  PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311  }
1312 }
1313 
1318 static void
1319 pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320  assert(node->body.size > 0);
1321  const size_t size = node->body.size - (last_value ? 1 : 0);
1322  for (size_t index = 0; index < size; index++) {
1323  pm_void_statement_check(parser, node->body.nodes[index]);
1324  }
1325 }
1326 
1332 typedef enum {
1333  PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334  PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335  PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336 } pm_conditional_predicate_type_t;
1337 
1341 static void
1342 pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343  switch (type) {
1344  case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345  PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346  break;
1347  case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348  PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349  break;
1350  case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351  break;
1352  }
1353 }
1354 
1359 static bool
1360 pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361  switch (PM_NODE_TYPE(node)) {
1362  case PM_ARRAY_NODE: {
1363  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364 
1365  const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366  for (size_t index = 0; index < cast->elements.size; index++) {
1367  if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368  }
1369 
1370  return true;
1371  }
1372  case PM_HASH_NODE: {
1373  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374 
1375  const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376  for (size_t index = 0; index < cast->elements.size; index++) {
1377  const pm_node_t *element = cast->elements.nodes[index];
1378  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379 
1380  const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381  if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382  }
1383 
1384  return true;
1385  }
1386  case PM_FALSE_NODE:
1387  case PM_FLOAT_NODE:
1388  case PM_IMAGINARY_NODE:
1389  case PM_INTEGER_NODE:
1390  case PM_NIL_NODE:
1391  case PM_RATIONAL_NODE:
1394  case PM_SOURCE_FILE_NODE:
1395  case PM_SOURCE_LINE_NODE:
1396  case PM_STRING_NODE:
1397  case PM_SYMBOL_NODE:
1398  case PM_TRUE_NODE:
1399  return true;
1400  default:
1401  return false;
1402  }
1403 }
1404 
1409 static inline void
1410 pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412  pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413  }
1414 }
1415 
1428 static void
1429 pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430  switch (PM_NODE_TYPE(node)) {
1431  case PM_AND_NODE: {
1432  pm_and_node_t *cast = (pm_and_node_t *) node;
1433  pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434  pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435  break;
1436  }
1437  case PM_OR_NODE: {
1438  pm_or_node_t *cast = (pm_or_node_t *) node;
1439  pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440  pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441  break;
1442  }
1443  case PM_PARENTHESES_NODE: {
1445 
1446  if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447  pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448  if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449  }
1450 
1451  break;
1452  }
1453  case PM_BEGIN_NODE: {
1454  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455  if (cast->statements != NULL) {
1456  pm_statements_node_t *statements = cast->statements;
1457  if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458  }
1459  break;
1460  }
1461  case PM_RANGE_NODE: {
1462  pm_range_node_t *cast = (pm_range_node_t *) node;
1463 
1464  if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465  if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466 
1467  // Here we change the range node into a flip flop node. We can do
1468  // this since the nodes are exactly the same except for the type.
1469  // We're only asserting against the size when we should probably
1470  // assert against the entire layout, but we'll assume tests will
1471  // catch this.
1472  assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473  node->type = PM_FLIP_FLOP_NODE;
1474 
1475  break;
1476  }
1478  // Here we change the regular expression node into a match last line
1479  // node. We can do this since the nodes are exactly the same except
1480  // for the type.
1481  assert(sizeof(pm_regular_expression_node_t) == sizeof(pm_match_last_line_node_t));
1482  node->type = PM_MATCH_LAST_LINE_NODE;
1483 
1484  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486  }
1487 
1488  break;
1490  // Here we change the interpolated regular expression node into an
1491  // interpolated match last line node. We can do this since the nodes
1492  // are exactly the same except for the type.
1495 
1496  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498  }
1499 
1500  break;
1501  case PM_INTEGER_NODE:
1502  if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504  pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505  }
1506  } else {
1507  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508  }
1509  break;
1510  case PM_STRING_NODE:
1511  case PM_SOURCE_FILE_NODE:
1513  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514  break;
1515  case PM_SYMBOL_NODE:
1517  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518  break;
1519  case PM_SOURCE_LINE_NODE:
1521  case PM_FLOAT_NODE:
1522  case PM_RATIONAL_NODE:
1523  case PM_IMAGINARY_NODE:
1524  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525  break;
1527  pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528  break;
1530  pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531  break;
1533  pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534  break;
1536  pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537  break;
1539  pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540  break;
1541  case PM_MULTI_WRITE_NODE:
1542  pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543  break;
1544  default:
1545  break;
1546  }
1547 }
1548 
1557 static inline pm_token_t
1558 not_provided(pm_parser_t *parser) {
1559  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560 }
1561 
1562 #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563 #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564 #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565 #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566 #define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567 #define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568 
1575 typedef struct {
1578 
1581 
1584 
1587 
1590 } pm_arguments_t;
1591 
1595 static inline const uint8_t *
1596 pm_arguments_end(pm_arguments_t *arguments) {
1597  if (arguments->block != NULL) {
1598  const uint8_t *end = arguments->block->location.end;
1599  if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600  end = arguments->closing_loc.end;
1601  }
1602  return end;
1603  }
1604  if (arguments->closing_loc.start != NULL) {
1605  return arguments->closing_loc.end;
1606  }
1607  if (arguments->arguments != NULL) {
1608  return arguments->arguments->base.location.end;
1609  }
1610  return arguments->closing_loc.end;
1611 }
1612 
1617 static void
1618 pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619  // First, check that we have arguments and that we don't have a closing
1620  // location for them.
1621  if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622  return;
1623  }
1624 
1625  // Next, check that we don't have a single parentheses argument. This would
1626  // look like:
1627  //
1628  // foo (1) {}
1629  //
1630  // In this case, it's actually okay for the block to be attached to the
1631  // call, even though it looks like it's attached to the argument.
1632  if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633  return;
1634  }
1635 
1636  // If we didn't hit a case before this check, then at this point we need to
1637  // add a syntax error.
1638  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639 }
1640 
1641 /******************************************************************************/
1642 /* Basic character checks */
1643 /******************************************************************************/
1644 
1651 static inline size_t
1652 char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1653  if (parser->encoding_changed) {
1654  size_t width;
1655  if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
1656  return width;
1657  } else if (*b == '_') {
1658  return 1;
1659  } else if (*b >= 0x80) {
1660  return parser->encoding->char_width(b, parser->end - b);
1661  } else {
1662  return 0;
1663  }
1664  } else if (*b < 0x80) {
1665  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1666  } else {
1667  return pm_encoding_utf_8_char_width(b, parser->end - b);
1668  }
1669 }
1670 
1675 static inline size_t
1676 char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1677  if (*b < 0x80) {
1678  return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1679  } else {
1680  return pm_encoding_utf_8_char_width(b, end - b);
1681  }
1682 }
1683 
1689 static inline size_t
1690 char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1691  if (parser->encoding_changed) {
1692  size_t width;
1693  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
1694  return width;
1695  } else if (*b == '_') {
1696  return 1;
1697  } else if (*b >= 0x80) {
1698  return parser->encoding->char_width(b, parser->end - b);
1699  } else {
1700  return 0;
1701  }
1702  }
1703  return char_is_identifier_utf8(b, parser->end);
1704 }
1705 
1706 // Here we're defining a perfect hash for the characters that are allowed in
1707 // global names. This is used to quickly check the next character after a $ to
1708 // see if it's a valid character for a global name.
1709 #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1710 #define PUNCT(idx) ( \
1711  BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1712  BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1713  BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1714  BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1715  BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1716  BIT('0', idx))
1717 
1718 const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1719 
1720 #undef BIT
1721 #undef PUNCT
1722 
1723 static inline bool
1724 char_is_global_name_punctuation(const uint8_t b) {
1725  const unsigned int i = (const unsigned int) b;
1726  if (i <= 0x20 || 0x7e < i) return false;
1727 
1728  return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1729 }
1730 
1731 static inline bool
1732 token_is_setter_name(pm_token_t *token) {
1733  return (
1734  (token->type == PM_TOKEN_IDENTIFIER) &&
1735  (token->end - token->start >= 2) &&
1736  (token->end[-1] == '=')
1737  );
1738 }
1739 
1743 static bool
1744 pm_local_is_keyword(const char *source, size_t length) {
1745 #define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1746 
1747  switch (length) {
1748  case 2:
1749  switch (source[0]) {
1750  case 'd': KEYWORD("do"); return false;
1751  case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1752  case 'o': KEYWORD("or"); return false;
1753  default: return false;
1754  }
1755  case 3:
1756  switch (source[0]) {
1757  case 'a': KEYWORD("and"); return false;
1758  case 'd': KEYWORD("def"); return false;
1759  case 'e': KEYWORD("end"); return false;
1760  case 'f': KEYWORD("for"); return false;
1761  case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1762  default: return false;
1763  }
1764  case 4:
1765  switch (source[0]) {
1766  case 'c': KEYWORD("case"); return false;
1767  case 'e': KEYWORD("else"); return false;
1768  case 'n': KEYWORD("next"); return false;
1769  case 'r': KEYWORD("redo"); return false;
1770  case 's': KEYWORD("self"); return false;
1771  case 't': KEYWORD("then"); KEYWORD("true"); return false;
1772  case 'w': KEYWORD("when"); return false;
1773  default: return false;
1774  }
1775  case 5:
1776  switch (source[0]) {
1777  case 'a': KEYWORD("alias"); return false;
1778  case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1779  case 'c': KEYWORD("class"); return false;
1780  case 'e': KEYWORD("elsif"); return false;
1781  case 'f': KEYWORD("false"); return false;
1782  case 'r': KEYWORD("retry"); return false;
1783  case 's': KEYWORD("super"); return false;
1784  case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1785  case 'w': KEYWORD("while"); return false;
1786  case 'y': KEYWORD("yield"); return false;
1787  default: return false;
1788  }
1789  case 6:
1790  switch (source[0]) {
1791  case 'e': KEYWORD("ensure"); return false;
1792  case 'm': KEYWORD("module"); return false;
1793  case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1794  case 'u': KEYWORD("unless"); return false;
1795  default: return false;
1796  }
1797  case 8:
1798  KEYWORD("__LINE__");
1799  KEYWORD("__FILE__");
1800  return false;
1801  case 12:
1802  KEYWORD("__ENCODING__");
1803  return false;
1804  default:
1805  return false;
1806  }
1807 
1808 #undef KEYWORD
1809 }
1810 
1811 /******************************************************************************/
1812 /* Node flag handling functions */
1813 /******************************************************************************/
1814 
1818 static inline void
1819 pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1820  node->flags |= flag;
1821 }
1822 
1826 static inline void
1827 pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1828  node->flags &= (pm_node_flags_t) ~flag;
1829 }
1830 
1834 static inline void
1835 pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1836  assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1837  PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1838  PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1839  PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1840  PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1841  PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1842  PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1843  PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1844 
1845  pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1846 }
1847 
1848 /******************************************************************************/
1849 /* Node creation functions */
1850 /******************************************************************************/
1851 
1857 #define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1858 
1862 static inline pm_node_flags_t
1863 pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1864  pm_node_flags_t flags = 0;
1865 
1866  if (closing->type == PM_TOKEN_REGEXP_END) {
1867  pm_buffer_t unknown_flags = { 0 };
1868 
1869  for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1870  switch (*flag) {
1871  case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1872  case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1873  case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1874  case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1875 
1876  case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1877  case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1878  case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1879  case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1880 
1881  default: pm_buffer_append_byte(&unknown_flags, *flag);
1882  }
1883  }
1884 
1885  size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1886  if (unknown_flags_length != 0) {
1887  const char *word = unknown_flags_length >= 2 ? "options" : "option";
1888  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1889  }
1890  pm_buffer_free(&unknown_flags);
1891  }
1892 
1893  return flags;
1894 }
1895 
1896 #undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1897 
1898 static pm_statements_node_t *
1899 pm_statements_node_create(pm_parser_t *parser);
1900 
1901 static void
1902 pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1903 
1904 static size_t
1905 pm_statements_node_body_length(pm_statements_node_t *node);
1906 
1911 static inline void *
1912 pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1913  void *memory = xcalloc(1, size);
1914  if (memory == NULL) {
1915  fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1916  abort();
1917  }
1918  return memory;
1919 }
1920 
1921 #define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1922 #define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1923 
1927 static pm_missing_node_t *
1928 pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1929  pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1930 
1931  *node = (pm_missing_node_t) {{
1932  .type = PM_MISSING_NODE,
1933  .node_id = PM_NODE_IDENTIFY(parser),
1934  .location = { .start = start, .end = end }
1935  }};
1936 
1937  return node;
1938 }
1939 
1943 static pm_alias_global_variable_node_t *
1944 pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1945  assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1946  pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1947 
1948  *node = (pm_alias_global_variable_node_t) {
1949  {
1950  .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1951  .node_id = PM_NODE_IDENTIFY(parser),
1952  .location = {
1953  .start = keyword->start,
1954  .end = old_name->location.end
1955  },
1956  },
1957  .new_name = new_name,
1958  .old_name = old_name,
1959  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1960  };
1961 
1962  return node;
1963 }
1964 
1968 static pm_alias_method_node_t *
1969 pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1970  assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1971  pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1972 
1973  *node = (pm_alias_method_node_t) {
1974  {
1975  .type = PM_ALIAS_METHOD_NODE,
1976  .node_id = PM_NODE_IDENTIFY(parser),
1977  .location = {
1978  .start = keyword->start,
1979  .end = old_name->location.end
1980  },
1981  },
1982  .new_name = new_name,
1983  .old_name = old_name,
1984  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1985  };
1986 
1987  return node;
1988 }
1989 
1993 static pm_alternation_pattern_node_t *
1994 pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
1995  pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
1996 
1997  *node = (pm_alternation_pattern_node_t) {
1998  {
1999  .type = PM_ALTERNATION_PATTERN_NODE,
2000  .node_id = PM_NODE_IDENTIFY(parser),
2001  .location = {
2002  .start = left->location.start,
2003  .end = right->location.end
2004  },
2005  },
2006  .left = left,
2007  .right = right,
2008  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2009  };
2010 
2011  return node;
2012 }
2013 
2017 static pm_and_node_t *
2018 pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2019  pm_assert_value_expression(parser, left);
2020 
2021  pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2022 
2023  *node = (pm_and_node_t) {
2024  {
2025  .type = PM_AND_NODE,
2026  .node_id = PM_NODE_IDENTIFY(parser),
2027  .location = {
2028  .start = left->location.start,
2029  .end = right->location.end
2030  },
2031  },
2032  .left = left,
2033  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2034  .right = right
2035  };
2036 
2037  return node;
2038 }
2039 
2043 static pm_arguments_node_t *
2044 pm_arguments_node_create(pm_parser_t *parser) {
2045  pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2046 
2047  *node = (pm_arguments_node_t) {
2048  {
2049  .type = PM_ARGUMENTS_NODE,
2050  .node_id = PM_NODE_IDENTIFY(parser),
2051  .location = PM_LOCATION_NULL_VALUE(parser)
2052  },
2053  .arguments = { 0 }
2054  };
2055 
2056  return node;
2057 }
2058 
2062 static size_t
2063 pm_arguments_node_size(pm_arguments_node_t *node) {
2064  return node->arguments.size;
2065 }
2066 
2070 static void
2071 pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2072  if (pm_arguments_node_size(node) == 0) {
2073  node->base.location.start = argument->location.start;
2074  }
2075 
2076  node->base.location.end = argument->location.end;
2077  pm_node_list_append(&node->arguments, argument);
2078 
2079  if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2080  if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2081  pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2082  } else {
2083  pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2084  }
2085  }
2086 }
2087 
2091 static pm_array_node_t *
2092 pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2093  pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2094 
2095  *node = (pm_array_node_t) {
2096  {
2097  .type = PM_ARRAY_NODE,
2098  .flags = PM_NODE_FLAG_STATIC_LITERAL,
2099  .node_id = PM_NODE_IDENTIFY(parser),
2100  .location = PM_LOCATION_TOKEN_VALUE(opening)
2101  },
2102  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104  .elements = { 0 }
2105  };
2106 
2107  return node;
2108 }
2109 
2113 static inline void
2114 pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115  if (!node->elements.size && !node->opening_loc.start) {
2116  node->base.location.start = element->location.start;
2117  }
2118 
2119  pm_node_list_append(&node->elements, element);
2120  node->base.location.end = element->location.end;
2121 
2122  // If the element is not a static literal, then the array is not a static
2123  // literal. Turn that flag off.
2124  if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125  pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2126  }
2127 
2128  if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129  pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130  }
2131 }
2132 
2136 static void
2137 pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138  assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139  node->base.location.end = closing->end;
2140  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141 }
2142 
2147 static pm_array_pattern_node_t *
2148 pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150 
2151  *node = (pm_array_pattern_node_t) {
2152  {
2153  .type = PM_ARRAY_PATTERN_NODE,
2154  .node_id = PM_NODE_IDENTIFY(parser),
2155  .location = {
2156  .start = nodes->nodes[0]->location.start,
2157  .end = nodes->nodes[nodes->size - 1]->location.end
2158  },
2159  },
2160  .constant = NULL,
2161  .rest = NULL,
2162  .requireds = { 0 },
2163  .posts = { 0 },
2164  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2165  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2166  };
2167 
2168  // For now we're going to just copy over each pointer manually. This could be
2169  // much more efficient, as we could instead resize the node list.
2170  bool found_rest = false;
2171  pm_node_t *child;
2172 
2173  PM_NODE_LIST_FOREACH(nodes, index, child) {
2174  if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2175  node->rest = child;
2176  found_rest = true;
2177  } else if (found_rest) {
2178  pm_node_list_append(&node->posts, child);
2179  } else {
2180  pm_node_list_append(&node->requireds, child);
2181  }
2182  }
2183 
2184  return node;
2185 }
2186 
2190 static pm_array_pattern_node_t *
2191 pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2192  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2193 
2194  *node = (pm_array_pattern_node_t) {
2195  {
2196  .type = PM_ARRAY_PATTERN_NODE,
2197  .node_id = PM_NODE_IDENTIFY(parser),
2198  .location = rest->location,
2199  },
2200  .constant = NULL,
2201  .rest = rest,
2202  .requireds = { 0 },
2203  .posts = { 0 },
2204  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2205  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2206  };
2207 
2208  return node;
2209 }
2210 
2215 static pm_array_pattern_node_t *
2216 pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2217  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2218 
2219  *node = (pm_array_pattern_node_t) {
2220  {
2221  .type = PM_ARRAY_PATTERN_NODE,
2222  .node_id = PM_NODE_IDENTIFY(parser),
2223  .location = {
2224  .start = constant->location.start,
2225  .end = closing->end
2226  },
2227  },
2228  .constant = constant,
2229  .rest = NULL,
2230  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2231  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2232  .requireds = { 0 },
2233  .posts = { 0 }
2234  };
2235 
2236  return node;
2237 }
2238 
2243 static pm_array_pattern_node_t *
2244 pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2245  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2246 
2247  *node = (pm_array_pattern_node_t) {
2248  {
2249  .type = PM_ARRAY_PATTERN_NODE,
2250  .node_id = PM_NODE_IDENTIFY(parser),
2251  .location = {
2252  .start = opening->start,
2253  .end = closing->end
2254  },
2255  },
2256  .constant = NULL,
2257  .rest = NULL,
2258  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2259  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2260  .requireds = { 0 },
2261  .posts = { 0 }
2262  };
2263 
2264  return node;
2265 }
2266 
2267 static inline void
2268 pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2269  pm_node_list_append(&node->requireds, inner);
2270 }
2271 
2275 static pm_assoc_node_t *
2276 pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2277  pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2278  const uint8_t *end;
2279 
2280  if (value != NULL && value->location.end > key->location.end) {
2281  end = value->location.end;
2282  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2283  end = operator->end;
2284  } else {
2285  end = key->location.end;
2286  }
2287 
2288  // Hash string keys will be frozen, so we can mark them as frozen here so
2289  // that the compiler picks them up and also when we check for static literal
2290  // on the keys it gets factored in.
2291  if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2292  key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2293  }
2294 
2295  // If the key and value of this assoc node are both static literals, then
2296  // we can mark this node as a static literal.
2297  pm_node_flags_t flags = 0;
2298  if (
2299  !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2300  value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2301  ) {
2302  flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2303  }
2304 
2305  *node = (pm_assoc_node_t) {
2306  {
2307  .type = PM_ASSOC_NODE,
2308  .flags = flags,
2309  .node_id = PM_NODE_IDENTIFY(parser),
2310  .location = {
2311  .start = key->location.start,
2312  .end = end
2313  },
2314  },
2315  .key = key,
2316  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2317  .value = value
2318  };
2319 
2320  return node;
2321 }
2322 
2326 static pm_assoc_splat_node_t *
2327 pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2328  assert(operator->type == PM_TOKEN_USTAR_STAR);
2329  pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2330 
2331  *node = (pm_assoc_splat_node_t) {
2332  {
2333  .type = PM_ASSOC_SPLAT_NODE,
2334  .node_id = PM_NODE_IDENTIFY(parser),
2335  .location = {
2336  .start = operator->start,
2337  .end = value == NULL ? operator->end : value->location.end
2338  },
2339  },
2340  .value = value,
2341  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2342  };
2343 
2344  return node;
2345 }
2346 
2350 static pm_back_reference_read_node_t *
2351 pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2352  assert(name->type == PM_TOKEN_BACK_REFERENCE);
2353  pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2354 
2355  *node = (pm_back_reference_read_node_t) {
2356  {
2357  .type = PM_BACK_REFERENCE_READ_NODE,
2358  .node_id = PM_NODE_IDENTIFY(parser),
2359  .location = PM_LOCATION_TOKEN_VALUE(name),
2360  },
2361  .name = pm_parser_constant_id_token(parser, name)
2362  };
2363 
2364  return node;
2365 }
2366 
2370 static pm_begin_node_t *
2371 pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2372  pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2373 
2374  *node = (pm_begin_node_t) {
2375  {
2376  .type = PM_BEGIN_NODE,
2377  .node_id = PM_NODE_IDENTIFY(parser),
2378  .location = {
2379  .start = begin_keyword->start,
2380  .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2381  },
2382  },
2383  .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2384  .statements = statements,
2385  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2386  };
2387 
2388  return node;
2389 }
2390 
2394 static void
2395 pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2396  // If the begin keyword doesn't exist, we set the start on the begin_node
2397  if (!node->begin_keyword_loc.start) {
2398  node->base.location.start = rescue_clause->base.location.start;
2399  }
2400  node->base.location.end = rescue_clause->base.location.end;
2401  node->rescue_clause = rescue_clause;
2402 }
2403 
2407 static void
2408 pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2409  node->base.location.end = else_clause->base.location.end;
2410  node->else_clause = else_clause;
2411 }
2412 
2416 static void
2417 pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2418  node->base.location.end = ensure_clause->base.location.end;
2419  node->ensure_clause = ensure_clause;
2420 }
2421 
2425 static void
2426 pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2427  assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2428 
2429  node->base.location.end = end_keyword->end;
2430  node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2431 }
2432 
2436 static pm_block_argument_node_t *
2437 pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2438  pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2439 
2440  *node = (pm_block_argument_node_t) {
2441  {
2442  .type = PM_BLOCK_ARGUMENT_NODE,
2443  .node_id = PM_NODE_IDENTIFY(parser),
2444  .location = {
2445  .start = operator->start,
2446  .end = expression == NULL ? operator->end : expression->location.end
2447  },
2448  },
2449  .expression = expression,
2450  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2451  };
2452 
2453  return node;
2454 }
2455 
2459 static pm_block_node_t *
2460 pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2461  pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2462 
2463  *node = (pm_block_node_t) {
2464  {
2465  .type = PM_BLOCK_NODE,
2466  .node_id = PM_NODE_IDENTIFY(parser),
2467  .location = { .start = opening->start, .end = closing->end },
2468  },
2469  .locals = *locals,
2470  .parameters = parameters,
2471  .body = body,
2472  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2473  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2474  };
2475 
2476  return node;
2477 }
2478 
2482 static pm_block_parameter_node_t *
2483 pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2484  assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2485  pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2486 
2487  *node = (pm_block_parameter_node_t) {
2488  {
2489  .type = PM_BLOCK_PARAMETER_NODE,
2490  .node_id = PM_NODE_IDENTIFY(parser),
2491  .location = {
2492  .start = operator->start,
2493  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2494  },
2495  },
2496  .name = pm_parser_optional_constant_id_token(parser, name),
2497  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2498  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2499  };
2500 
2501  return node;
2502 }
2503 
2507 static pm_block_parameters_node_t *
2508 pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2509  pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2510 
2511  const uint8_t *start;
2512  if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2513  start = opening->start;
2514  } else if (parameters != NULL) {
2515  start = parameters->base.location.start;
2516  } else {
2517  start = NULL;
2518  }
2519 
2520  const uint8_t *end;
2521  if (parameters != NULL) {
2522  end = parameters->base.location.end;
2523  } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2524  end = opening->end;
2525  } else {
2526  end = NULL;
2527  }
2528 
2529  *node = (pm_block_parameters_node_t) {
2530  {
2531  .type = PM_BLOCK_PARAMETERS_NODE,
2532  .node_id = PM_NODE_IDENTIFY(parser),
2533  .location = {
2534  .start = start,
2535  .end = end
2536  }
2537  },
2538  .parameters = parameters,
2539  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2540  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2541  .locals = { 0 }
2542  };
2543 
2544  return node;
2545 }
2546 
2550 static void
2551 pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2552  assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2553 
2554  node->base.location.end = closing->end;
2555  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2556 }
2557 
2561 static pm_block_local_variable_node_t *
2562 pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2563  pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2564 
2565  *node = (pm_block_local_variable_node_t) {
2566  {
2567  .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2568  .node_id = PM_NODE_IDENTIFY(parser),
2569  .location = PM_LOCATION_TOKEN_VALUE(name),
2570  },
2571  .name = pm_parser_constant_id_token(parser, name)
2572  };
2573 
2574  return node;
2575 }
2576 
2580 static void
2581 pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2582  pm_node_list_append(&node->locals, (pm_node_t *) local);
2583 
2584  if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2585  node->base.location.end = local->base.location.end;
2586 }
2587 
2591 static pm_break_node_t *
2592 pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2593  assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2594  pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2595 
2596  *node = (pm_break_node_t) {
2597  {
2598  .type = PM_BREAK_NODE,
2599  .node_id = PM_NODE_IDENTIFY(parser),
2600  .location = {
2601  .start = keyword->start,
2602  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2603  },
2604  },
2605  .arguments = arguments,
2606  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2607  };
2608 
2609  return node;
2610 }
2611 
2612 // There are certain flags that we want to use internally but don't want to
2613 // expose because they are not relevant beyond parsing. Therefore we'll define
2614 // them here and not define them in config.yml/a header file.
2615 static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2616 static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2617 static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2618 static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2619 
2625 static pm_call_node_t *
2626 pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2627  pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2628 
2629  *node = (pm_call_node_t) {
2630  {
2631  .type = PM_CALL_NODE,
2632  .flags = flags,
2633  .node_id = PM_NODE_IDENTIFY(parser),
2634  .location = PM_LOCATION_NULL_VALUE(parser),
2635  },
2636  .receiver = NULL,
2637  .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2638  .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2639  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2640  .arguments = NULL,
2641  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2642  .block = NULL,
2643  .name = 0
2644  };
2645 
2646  return node;
2647 }
2648 
2653 static inline pm_node_flags_t
2654 pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2655  return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2656 }
2657 
2662 static pm_call_node_t *
2663 pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2664  pm_assert_value_expression(parser, receiver);
2665 
2666  pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2667  if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2668  flags |= PM_CALL_NODE_FLAGS_INDEX;
2669  }
2670 
2671  pm_call_node_t *node = pm_call_node_create(parser, flags);
2672 
2673  node->base.location.start = receiver->location.start;
2674  node->base.location.end = pm_arguments_end(arguments);
2675 
2676  node->receiver = receiver;
2677  node->message_loc.start = arguments->opening_loc.start;
2678  node->message_loc.end = arguments->closing_loc.end;
2679 
2680  node->opening_loc = arguments->opening_loc;
2681  node->arguments = arguments->arguments;
2682  node->closing_loc = arguments->closing_loc;
2683  node->block = arguments->block;
2684 
2685  node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2686  return node;
2687 }
2688 
2692 static pm_call_node_t *
2693 pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2694  pm_assert_value_expression(parser, receiver);
2695  pm_assert_value_expression(parser, argument);
2696 
2697  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2698 
2699  node->base.location.start = MIN(receiver->location.start, argument->location.start);
2700  node->base.location.end = MAX(receiver->location.end, argument->location.end);
2701 
2702  node->receiver = receiver;
2703  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2704 
2705  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2706  pm_arguments_node_arguments_append(arguments, argument);
2707  node->arguments = arguments;
2708 
2709  node->name = pm_parser_constant_id_token(parser, operator);
2710  return node;
2711 }
2712 
2716 static pm_call_node_t *
2717 pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2718  pm_assert_value_expression(parser, receiver);
2719 
2720  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2721 
2722  node->base.location.start = receiver->location.start;
2723  const uint8_t *end = pm_arguments_end(arguments);
2724  if (end == NULL) {
2725  end = message->end;
2726  }
2727  node->base.location.end = end;
2728 
2729  node->receiver = receiver;
2730  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2731  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2732  node->opening_loc = arguments->opening_loc;
2733  node->arguments = arguments->arguments;
2734  node->closing_loc = arguments->closing_loc;
2735  node->block = arguments->block;
2736 
2737  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2738  pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2739  }
2740 
2741  node->name = pm_parser_constant_id_token(parser, message);
2742  return node;
2743 }
2744 
2748 static pm_call_node_t *
2749 pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2750  pm_call_node_t *node = pm_call_node_create(parser, 0);
2751  node->base.location.start = parser->start;
2752  node->base.location.end = parser->end;
2753 
2754  node->receiver = receiver;
2755  node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2756  node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2757  node->arguments = arguments;
2758 
2759  node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2760  return node;
2761 }
2762 
2767 static pm_call_node_t *
2768 pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2769  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2770 
2771  node->base.location.start = message->start;
2772  node->base.location.end = pm_arguments_end(arguments);
2773 
2774  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2775  node->opening_loc = arguments->opening_loc;
2776  node->arguments = arguments->arguments;
2777  node->closing_loc = arguments->closing_loc;
2778  node->block = arguments->block;
2779 
2780  node->name = pm_parser_constant_id_token(parser, message);
2781  return node;
2782 }
2783 
2788 static pm_call_node_t *
2789 pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2790  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2791 
2792  node->base.location = PM_LOCATION_NULL_VALUE(parser);
2793  node->arguments = arguments;
2794 
2795  node->name = name;
2796  return node;
2797 }
2798 
2802 static pm_call_node_t *
2803 pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2804  pm_assert_value_expression(parser, receiver);
2805  if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2806 
2807  pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2808 
2809  node->base.location.start = message->start;
2810  if (arguments->closing_loc.start != NULL) {
2811  node->base.location.end = arguments->closing_loc.end;
2812  } else {
2813  assert(receiver != NULL);
2814  node->base.location.end = receiver->location.end;
2815  }
2816 
2817  node->receiver = receiver;
2818  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819  node->opening_loc = arguments->opening_loc;
2820  node->arguments = arguments->arguments;
2821  node->closing_loc = arguments->closing_loc;
2822 
2823  node->name = pm_parser_constant_id_constant(parser, "!", 1);
2824  return node;
2825 }
2826 
2830 static pm_call_node_t *
2831 pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2832  pm_assert_value_expression(parser, receiver);
2833 
2834  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2835 
2836  node->base.location.start = receiver->location.start;
2837  node->base.location.end = pm_arguments_end(arguments);
2838 
2839  node->receiver = receiver;
2840  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2841  node->opening_loc = arguments->opening_loc;
2842  node->arguments = arguments->arguments;
2843  node->closing_loc = arguments->closing_loc;
2844  node->block = arguments->block;
2845 
2846  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2847  pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2848  }
2849 
2850  node->name = pm_parser_constant_id_constant(parser, "call", 4);
2851  return node;
2852 }
2853 
2857 static pm_call_node_t *
2858 pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2859  pm_assert_value_expression(parser, receiver);
2860 
2861  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2862 
2863  node->base.location.start = operator->start;
2864  node->base.location.end = receiver->location.end;
2865 
2866  node->receiver = receiver;
2867  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2868 
2869  node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2870  return node;
2871 }
2872 
2877 static pm_call_node_t *
2878 pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2879  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2880 
2881  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2882  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2883 
2884  node->name = pm_parser_constant_id_token(parser, message);
2885  return node;
2886 }
2887 
2892 static inline bool
2893 pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2894  return (
2895  (node->message_loc.start != NULL) &&
2896  (node->message_loc.end[-1] != '!') &&
2897  (node->message_loc.end[-1] != '?') &&
2898  char_is_identifier_start(parser, node->message_loc.start) &&
2899  (node->opening_loc.start == NULL) &&
2900  (node->arguments == NULL) &&
2901  (node->block == NULL)
2902  );
2903 }
2904 
2908 static void
2909 pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2910  pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2911 
2912  if (write_constant->length > 0) {
2913  size_t length = write_constant->length - 1;
2914 
2915  void *memory = xmalloc(length);
2916  memcpy(memory, write_constant->start, length);
2917 
2918  *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2919  } else {
2920  // We can get here if the message was missing because of a syntax error.
2921  *read_name = pm_parser_constant_id_constant(parser, "", 0);
2922  }
2923 }
2924 
2928 static pm_call_and_write_node_t *
2929 pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2930  assert(target->block == NULL);
2931  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932  pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2933 
2934  *node = (pm_call_and_write_node_t) {
2935  {
2936  .type = PM_CALL_AND_WRITE_NODE,
2937  .flags = target->base.flags,
2938  .node_id = PM_NODE_IDENTIFY(parser),
2939  .location = {
2940  .start = target->base.location.start,
2941  .end = value->location.end
2942  }
2943  },
2944  .receiver = target->receiver,
2945  .call_operator_loc = target->call_operator_loc,
2946  .message_loc = target->message_loc,
2947  .read_name = 0,
2948  .write_name = target->name,
2949  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2950  .value = value
2951  };
2952 
2953  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2954 
2955  // Here we're going to free the target, since it is no longer necessary.
2956  // However, we don't want to call `pm_node_destroy` because we want to keep
2957  // around all of its children since we just reused them.
2958  xfree(target);
2959 
2960  return node;
2961 }
2962 
2967 static void
2968 pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2969  if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2970  if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2971  pm_node_t *node;
2972  PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2973  if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2974  pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2975  break;
2976  }
2977  }
2978  }
2979 
2980  if (block != NULL) {
2981  pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2982  }
2983  }
2984 }
2985 
2989 static pm_index_and_write_node_t *
2990 pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2991  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2992  pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2993 
2994  pm_index_arguments_check(parser, target->arguments, target->block);
2995 
2996  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2997  *node = (pm_index_and_write_node_t) {
2998  {
2999  .type = PM_INDEX_AND_WRITE_NODE,
3000  .flags = target->base.flags,
3001  .node_id = PM_NODE_IDENTIFY(parser),
3002  .location = {
3003  .start = target->base.location.start,
3004  .end = value->location.end
3005  }
3006  },
3007  .receiver = target->receiver,
3008  .call_operator_loc = target->call_operator_loc,
3009  .opening_loc = target->opening_loc,
3010  .arguments = target->arguments,
3011  .closing_loc = target->closing_loc,
3012  .block = (pm_block_argument_node_t *) target->block,
3013  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3014  .value = value
3015  };
3016 
3017  // Here we're going to free the target, since it is no longer necessary.
3018  // However, we don't want to call `pm_node_destroy` because we want to keep
3019  // around all of its children since we just reused them.
3020  xfree(target);
3021 
3022  return node;
3023 }
3024 
3028 static pm_call_operator_write_node_t *
3029 pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3030  assert(target->block == NULL);
3031  pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3032 
3033  *node = (pm_call_operator_write_node_t) {
3034  {
3035  .type = PM_CALL_OPERATOR_WRITE_NODE,
3036  .flags = target->base.flags,
3037  .node_id = PM_NODE_IDENTIFY(parser),
3038  .location = {
3039  .start = target->base.location.start,
3040  .end = value->location.end
3041  }
3042  },
3043  .receiver = target->receiver,
3044  .call_operator_loc = target->call_operator_loc,
3045  .message_loc = target->message_loc,
3046  .read_name = 0,
3047  .write_name = target->name,
3048  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3049  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3050  .value = value
3051  };
3052 
3053  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3054 
3055  // Here we're going to free the target, since it is no longer necessary.
3056  // However, we don't want to call `pm_node_destroy` because we want to keep
3057  // around all of its children since we just reused them.
3058  xfree(target);
3059 
3060  return node;
3061 }
3062 
3066 static pm_index_operator_write_node_t *
3067 pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3068  pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3069 
3070  pm_index_arguments_check(parser, target->arguments, target->block);
3071 
3072  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3073  *node = (pm_index_operator_write_node_t) {
3074  {
3075  .type = PM_INDEX_OPERATOR_WRITE_NODE,
3076  .flags = target->base.flags,
3077  .node_id = PM_NODE_IDENTIFY(parser),
3078  .location = {
3079  .start = target->base.location.start,
3080  .end = value->location.end
3081  }
3082  },
3083  .receiver = target->receiver,
3084  .call_operator_loc = target->call_operator_loc,
3085  .opening_loc = target->opening_loc,
3086  .arguments = target->arguments,
3087  .closing_loc = target->closing_loc,
3088  .block = (pm_block_argument_node_t *) target->block,
3089  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3090  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3091  .value = value
3092  };
3093 
3094  // Here we're going to free the target, since it is no longer necessary.
3095  // However, we don't want to call `pm_node_destroy` because we want to keep
3096  // around all of its children since we just reused them.
3097  xfree(target);
3098 
3099  return node;
3100 }
3101 
3105 static pm_call_or_write_node_t *
3106 pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3107  assert(target->block == NULL);
3108  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3109  pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3110 
3111  *node = (pm_call_or_write_node_t) {
3112  {
3113  .type = PM_CALL_OR_WRITE_NODE,
3114  .flags = target->base.flags,
3115  .node_id = PM_NODE_IDENTIFY(parser),
3116  .location = {
3117  .start = target->base.location.start,
3118  .end = value->location.end
3119  }
3120  },
3121  .receiver = target->receiver,
3122  .call_operator_loc = target->call_operator_loc,
3123  .message_loc = target->message_loc,
3124  .read_name = 0,
3125  .write_name = target->name,
3126  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3127  .value = value
3128  };
3129 
3130  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3131 
3132  // Here we're going to free the target, since it is no longer necessary.
3133  // However, we don't want to call `pm_node_destroy` because we want to keep
3134  // around all of its children since we just reused them.
3135  xfree(target);
3136 
3137  return node;
3138 }
3139 
3143 static pm_index_or_write_node_t *
3144 pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3145  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3146  pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3147 
3148  pm_index_arguments_check(parser, target->arguments, target->block);
3149 
3150  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3151  *node = (pm_index_or_write_node_t) {
3152  {
3153  .type = PM_INDEX_OR_WRITE_NODE,
3154  .flags = target->base.flags,
3155  .node_id = PM_NODE_IDENTIFY(parser),
3156  .location = {
3157  .start = target->base.location.start,
3158  .end = value->location.end
3159  }
3160  },
3161  .receiver = target->receiver,
3162  .call_operator_loc = target->call_operator_loc,
3163  .opening_loc = target->opening_loc,
3164  .arguments = target->arguments,
3165  .closing_loc = target->closing_loc,
3166  .block = (pm_block_argument_node_t *) target->block,
3167  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3168  .value = value
3169  };
3170 
3171  // Here we're going to free the target, since it is no longer necessary.
3172  // However, we don't want to call `pm_node_destroy` because we want to keep
3173  // around all of its children since we just reused them.
3174  xfree(target);
3175 
3176  return node;
3177 }
3178 
3183 static pm_call_target_node_t *
3184 pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3185  pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3186 
3187  *node = (pm_call_target_node_t) {
3188  {
3189  .type = PM_CALL_TARGET_NODE,
3190  .flags = target->base.flags,
3191  .node_id = PM_NODE_IDENTIFY(parser),
3192  .location = target->base.location
3193  },
3194  .receiver = target->receiver,
3195  .call_operator_loc = target->call_operator_loc,
3196  .name = target->name,
3197  .message_loc = target->message_loc
3198  };
3199 
3200  // Here we're going to free the target, since it is no longer necessary.
3201  // However, we don't want to call `pm_node_destroy` because we want to keep
3202  // around all of its children since we just reused them.
3203  xfree(target);
3204 
3205  return node;
3206 }
3207 
3212 static pm_index_target_node_t *
3213 pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3214  pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3215  pm_node_flags_t flags = target->base.flags;
3216 
3217  pm_index_arguments_check(parser, target->arguments, target->block);
3218 
3219  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3220  *node = (pm_index_target_node_t) {
3221  {
3222  .type = PM_INDEX_TARGET_NODE,
3223  .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3224  .node_id = PM_NODE_IDENTIFY(parser),
3225  .location = target->base.location
3226  },
3227  .receiver = target->receiver,
3228  .opening_loc = target->opening_loc,
3229  .arguments = target->arguments,
3230  .closing_loc = target->closing_loc,
3231  .block = (pm_block_argument_node_t *) target->block,
3232  };
3233 
3234  // Here we're going to free the target, since it is no longer necessary.
3235  // However, we don't want to call `pm_node_destroy` because we want to keep
3236  // around all of its children since we just reused them.
3237  xfree(target);
3238 
3239  return node;
3240 }
3241 
3245 static pm_capture_pattern_node_t *
3246 pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3247  pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3248 
3249  *node = (pm_capture_pattern_node_t) {
3250  {
3251  .type = PM_CAPTURE_PATTERN_NODE,
3252  .node_id = PM_NODE_IDENTIFY(parser),
3253  .location = {
3254  .start = value->location.start,
3255  .end = target->base.location.end
3256  },
3257  },
3258  .value = value,
3259  .target = target,
3260  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3261  };
3262 
3263  return node;
3264 }
3265 
3269 static pm_case_node_t *
3270 pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3271  pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3272 
3273  *node = (pm_case_node_t) {
3274  {
3275  .type = PM_CASE_NODE,
3276  .node_id = PM_NODE_IDENTIFY(parser),
3277  .location = {
3278  .start = case_keyword->start,
3279  .end = end_keyword->end
3280  },
3281  },
3282  .predicate = predicate,
3283  .else_clause = NULL,
3284  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3285  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3286  .conditions = { 0 }
3287  };
3288 
3289  return node;
3290 }
3291 
3295 static void
3296 pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3297  assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3298 
3299  pm_node_list_append(&node->conditions, condition);
3300  node->base.location.end = condition->location.end;
3301 }
3302 
3306 static void
3307 pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3308  node->else_clause = else_clause;
3309  node->base.location.end = else_clause->base.location.end;
3310 }
3311 
3315 static void
3316 pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3317  node->base.location.end = end_keyword->end;
3318  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3319 }
3320 
3324 static pm_case_match_node_t *
3325 pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3326  pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3327 
3328  *node = (pm_case_match_node_t) {
3329  {
3330  .type = PM_CASE_MATCH_NODE,
3331  .node_id = PM_NODE_IDENTIFY(parser),
3332  .location = {
3333  .start = case_keyword->start,
3334  .end = end_keyword->end
3335  },
3336  },
3337  .predicate = predicate,
3338  .else_clause = NULL,
3339  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3340  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3341  .conditions = { 0 }
3342  };
3343 
3344  return node;
3345 }
3346 
3350 static void
3351 pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3352  assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3353 
3354  pm_node_list_append(&node->conditions, condition);
3355  node->base.location.end = condition->location.end;
3356 }
3357 
3361 static void
3362 pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3363  node->else_clause = else_clause;
3364  node->base.location.end = else_clause->base.location.end;
3365 }
3366 
3370 static void
3371 pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3372  node->base.location.end = end_keyword->end;
3373  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3374 }
3375 
3379 static pm_class_node_t *
3380 pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3381  pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3382 
3383  *node = (pm_class_node_t) {
3384  {
3385  .type = PM_CLASS_NODE,
3386  .node_id = PM_NODE_IDENTIFY(parser),
3387  .location = { .start = class_keyword->start, .end = end_keyword->end },
3388  },
3389  .locals = *locals,
3390  .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3391  .constant_path = constant_path,
3392  .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3393  .superclass = superclass,
3394  .body = body,
3395  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3396  .name = pm_parser_constant_id_token(parser, name)
3397  };
3398 
3399  return node;
3400 }
3401 
3405 static pm_class_variable_and_write_node_t *
3406 pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3407  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3408  pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3409 
3410  *node = (pm_class_variable_and_write_node_t) {
3411  {
3412  .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3413  .node_id = PM_NODE_IDENTIFY(parser),
3414  .location = {
3415  .start = target->base.location.start,
3416  .end = value->location.end
3417  }
3418  },
3419  .name = target->name,
3420  .name_loc = target->base.location,
3421  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3422  .value = value
3423  };
3424 
3425  return node;
3426 }
3427 
3431 static pm_class_variable_operator_write_node_t *
3432 pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3433  pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3434 
3435  *node = (pm_class_variable_operator_write_node_t) {
3436  {
3437  .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3438  .node_id = PM_NODE_IDENTIFY(parser),
3439  .location = {
3440  .start = target->base.location.start,
3441  .end = value->location.end
3442  }
3443  },
3444  .name = target->name,
3445  .name_loc = target->base.location,
3446  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3447  .value = value,
3448  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3449  };
3450 
3451  return node;
3452 }
3453 
3457 static pm_class_variable_or_write_node_t *
3458 pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3459  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3460  pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3461 
3462  *node = (pm_class_variable_or_write_node_t) {
3463  {
3464  .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3465  .node_id = PM_NODE_IDENTIFY(parser),
3466  .location = {
3467  .start = target->base.location.start,
3468  .end = value->location.end
3469  }
3470  },
3471  .name = target->name,
3472  .name_loc = target->base.location,
3473  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3474  .value = value
3475  };
3476 
3477  return node;
3478 }
3479 
3483 static pm_class_variable_read_node_t *
3484 pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3485  assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3486  pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3487 
3488  *node = (pm_class_variable_read_node_t) {
3489  {
3490  .type = PM_CLASS_VARIABLE_READ_NODE,
3491  .node_id = PM_NODE_IDENTIFY(parser),
3492  .location = PM_LOCATION_TOKEN_VALUE(token)
3493  },
3494  .name = pm_parser_constant_id_token(parser, token)
3495  };
3496 
3497  return node;
3498 }
3499 
3506 static inline pm_node_flags_t
3507 pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3508  if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3509  return flags;
3510  }
3511  return 0;
3512 }
3513 
3517 static pm_class_variable_write_node_t *
3518 pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3519  pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3520 
3521  *node = (pm_class_variable_write_node_t) {
3522  {
3523  .type = PM_CLASS_VARIABLE_WRITE_NODE,
3524  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3525  .node_id = PM_NODE_IDENTIFY(parser),
3526  .location = {
3527  .start = read_node->base.location.start,
3528  .end = value->location.end
3529  },
3530  },
3531  .name = read_node->name,
3532  .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3533  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3534  .value = value
3535  };
3536 
3537  return node;
3538 }
3539 
3543 static pm_constant_path_and_write_node_t *
3544 pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3545  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3546  pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3547 
3548  *node = (pm_constant_path_and_write_node_t) {
3549  {
3550  .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3551  .node_id = PM_NODE_IDENTIFY(parser),
3552  .location = {
3553  .start = target->base.location.start,
3554  .end = value->location.end
3555  }
3556  },
3557  .target = target,
3558  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3559  .value = value
3560  };
3561 
3562  return node;
3563 }
3564 
3568 static pm_constant_path_operator_write_node_t *
3569 pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3570  pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3571 
3572  *node = (pm_constant_path_operator_write_node_t) {
3573  {
3574  .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3575  .node_id = PM_NODE_IDENTIFY(parser),
3576  .location = {
3577  .start = target->base.location.start,
3578  .end = value->location.end
3579  }
3580  },
3581  .target = target,
3582  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583  .value = value,
3584  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585  };
3586 
3587  return node;
3588 }
3589 
3593 static pm_constant_path_or_write_node_t *
3594 pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3595  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3596  pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3597 
3598  *node = (pm_constant_path_or_write_node_t) {
3599  {
3600  .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3601  .node_id = PM_NODE_IDENTIFY(parser),
3602  .location = {
3603  .start = target->base.location.start,
3604  .end = value->location.end
3605  }
3606  },
3607  .target = target,
3608  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3609  .value = value
3610  };
3611 
3612  return node;
3613 }
3614 
3618 static pm_constant_path_node_t *
3619 pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3620  pm_assert_value_expression(parser, parent);
3621  pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3622 
3623  pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3624  if (name_token->type == PM_TOKEN_CONSTANT) {
3625  name = pm_parser_constant_id_token(parser, name_token);
3626  }
3627 
3628  *node = (pm_constant_path_node_t) {
3629  {
3630  .type = PM_CONSTANT_PATH_NODE,
3631  .node_id = PM_NODE_IDENTIFY(parser),
3632  .location = {
3633  .start = parent == NULL ? delimiter->start : parent->location.start,
3634  .end = name_token->end
3635  },
3636  },
3637  .parent = parent,
3638  .name = name,
3639  .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3640  .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3641  };
3642 
3643  return node;
3644 }
3645 
3649 static pm_constant_path_write_node_t *
3650 pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3651  pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3652 
3653  *node = (pm_constant_path_write_node_t) {
3654  {
3655  .type = PM_CONSTANT_PATH_WRITE_NODE,
3656  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3657  .node_id = PM_NODE_IDENTIFY(parser),
3658  .location = {
3659  .start = target->base.location.start,
3660  .end = value->location.end
3661  },
3662  },
3663  .target = target,
3664  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3665  .value = value
3666  };
3667 
3668  return node;
3669 }
3670 
3674 static pm_constant_and_write_node_t *
3675 pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3676  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3677  pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3678 
3679  *node = (pm_constant_and_write_node_t) {
3680  {
3681  .type = PM_CONSTANT_AND_WRITE_NODE,
3682  .node_id = PM_NODE_IDENTIFY(parser),
3683  .location = {
3684  .start = target->base.location.start,
3685  .end = value->location.end
3686  }
3687  },
3688  .name = target->name,
3689  .name_loc = target->base.location,
3690  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3691  .value = value
3692  };
3693 
3694  return node;
3695 }
3696 
3700 static pm_constant_operator_write_node_t *
3701 pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3702  pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3703 
3704  *node = (pm_constant_operator_write_node_t) {
3705  {
3706  .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3707  .node_id = PM_NODE_IDENTIFY(parser),
3708  .location = {
3709  .start = target->base.location.start,
3710  .end = value->location.end
3711  }
3712  },
3713  .name = target->name,
3714  .name_loc = target->base.location,
3715  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3716  .value = value,
3717  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3718  };
3719 
3720  return node;
3721 }
3722 
3726 static pm_constant_or_write_node_t *
3727 pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3728  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3729  pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3730 
3731  *node = (pm_constant_or_write_node_t) {
3732  {
3733  .type = PM_CONSTANT_OR_WRITE_NODE,
3734  .node_id = PM_NODE_IDENTIFY(parser),
3735  .location = {
3736  .start = target->base.location.start,
3737  .end = value->location.end
3738  }
3739  },
3740  .name = target->name,
3741  .name_loc = target->base.location,
3742  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3743  .value = value
3744  };
3745 
3746  return node;
3747 }
3748 
3752 static pm_constant_read_node_t *
3753 pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3754  assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3755  pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3756 
3757  *node = (pm_constant_read_node_t) {
3758  {
3759  .type = PM_CONSTANT_READ_NODE,
3760  .node_id = PM_NODE_IDENTIFY(parser),
3761  .location = PM_LOCATION_TOKEN_VALUE(name)
3762  },
3763  .name = pm_parser_constant_id_token(parser, name)
3764  };
3765 
3766  return node;
3767 }
3768 
3772 static pm_constant_write_node_t *
3773 pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3774  pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3775 
3776  *node = (pm_constant_write_node_t) {
3777  {
3778  .type = PM_CONSTANT_WRITE_NODE,
3779  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3780  .node_id = PM_NODE_IDENTIFY(parser),
3781  .location = {
3782  .start = target->base.location.start,
3783  .end = value->location.end
3784  }
3785  },
3786  .name = target->name,
3787  .name_loc = target->base.location,
3788  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3789  .value = value
3790  };
3791 
3792  return node;
3793 }
3794 
3798 static void
3799 pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3800  switch (PM_NODE_TYPE(node)) {
3801  case PM_BEGIN_NODE: {
3802  const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3803  if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3804  break;
3805  }
3806  case PM_PARENTHESES_NODE: {
3807  const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3808  if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3809  break;
3810  }
3811  case PM_STATEMENTS_NODE: {
3812  const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3813  pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3814  break;
3815  }
3816  case PM_ARRAY_NODE:
3817  case PM_FLOAT_NODE:
3818  case PM_IMAGINARY_NODE:
3819  case PM_INTEGER_NODE:
3820  case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3821  case PM_INTERPOLATED_STRING_NODE:
3822  case PM_INTERPOLATED_SYMBOL_NODE:
3823  case PM_INTERPOLATED_X_STRING_NODE:
3824  case PM_RATIONAL_NODE:
3825  case PM_REGULAR_EXPRESSION_NODE:
3826  case PM_SOURCE_ENCODING_NODE:
3827  case PM_SOURCE_FILE_NODE:
3828  case PM_SOURCE_LINE_NODE:
3829  case PM_STRING_NODE:
3830  case PM_SYMBOL_NODE:
3831  case PM_X_STRING_NODE:
3832  pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3833  break;
3834  default:
3835  break;
3836  }
3837 }
3838 
3842 static pm_def_node_t *
3843 pm_def_node_create(
3844  pm_parser_t *parser,
3845  pm_constant_id_t name,
3846  const pm_token_t *name_loc,
3847  pm_node_t *receiver,
3848  pm_parameters_node_t *parameters,
3849  pm_node_t *body,
3850  pm_constant_id_list_t *locals,
3851  const pm_token_t *def_keyword,
3852  const pm_token_t *operator,
3853  const pm_token_t *lparen,
3854  const pm_token_t *rparen,
3855  const pm_token_t *equal,
3856  const pm_token_t *end_keyword
3857 ) {
3858  pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3859  const uint8_t *end;
3860 
3861  if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3862  end = body->location.end;
3863  } else {
3864  end = end_keyword->end;
3865  }
3866 
3867  if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3868  pm_def_node_receiver_check(parser, receiver);
3869  }
3870 
3871  *node = (pm_def_node_t) {
3872  {
3873  .type = PM_DEF_NODE,
3874  .node_id = PM_NODE_IDENTIFY(parser),
3875  .location = { .start = def_keyword->start, .end = end },
3876  },
3877  .name = name,
3878  .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3879  .receiver = receiver,
3880  .parameters = parameters,
3881  .body = body,
3882  .locals = *locals,
3883  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3884  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3885  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3886  .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3887  .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3888  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3889  };
3890 
3891  return node;
3892 }
3893 
3897 static pm_defined_node_t *
3898 pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3899  pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3900 
3901  *node = (pm_defined_node_t) {
3902  {
3903  .type = PM_DEFINED_NODE,
3904  .node_id = PM_NODE_IDENTIFY(parser),
3905  .location = {
3906  .start = keyword_loc->start,
3907  .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3908  },
3909  },
3910  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3911  .value = value,
3912  .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3913  .keyword_loc = *keyword_loc
3914  };
3915 
3916  return node;
3917 }
3918 
3922 static pm_else_node_t *
3923 pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3924  pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3925  const uint8_t *end = NULL;
3926  if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3927  end = statements->base.location.end;
3928  } else {
3929  end = end_keyword->end;
3930  }
3931 
3932  *node = (pm_else_node_t) {
3933  {
3934  .type = PM_ELSE_NODE,
3935  .node_id = PM_NODE_IDENTIFY(parser),
3936  .location = {
3937  .start = else_keyword->start,
3938  .end = end,
3939  },
3940  },
3941  .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3942  .statements = statements,
3943  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3944  };
3945 
3946  return node;
3947 }
3948 
3952 static pm_embedded_statements_node_t *
3953 pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3954  pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3955 
3956  *node = (pm_embedded_statements_node_t) {
3957  {
3958  .type = PM_EMBEDDED_STATEMENTS_NODE,
3959  .node_id = PM_NODE_IDENTIFY(parser),
3960  .location = {
3961  .start = opening->start,
3962  .end = closing->end
3963  }
3964  },
3965  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3966  .statements = statements,
3967  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3968  };
3969 
3970  return node;
3971 }
3972 
3976 static pm_embedded_variable_node_t *
3977 pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3978  pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3979 
3980  *node = (pm_embedded_variable_node_t) {
3981  {
3982  .type = PM_EMBEDDED_VARIABLE_NODE,
3983  .node_id = PM_NODE_IDENTIFY(parser),
3984  .location = {
3985  .start = operator->start,
3986  .end = variable->location.end
3987  }
3988  },
3989  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3990  .variable = variable
3991  };
3992 
3993  return node;
3994 }
3995 
3999 static pm_ensure_node_t *
4000 pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4001  pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4002 
4003  *node = (pm_ensure_node_t) {
4004  {
4005  .type = PM_ENSURE_NODE,
4006  .node_id = PM_NODE_IDENTIFY(parser),
4007  .location = {
4008  .start = ensure_keyword->start,
4009  .end = end_keyword->end
4010  },
4011  },
4012  .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4013  .statements = statements,
4014  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4015  };
4016 
4017  return node;
4018 }
4019 
4023 static pm_false_node_t *
4024 pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4025  assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4026  pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4027 
4028  *node = (pm_false_node_t) {{
4029  .type = PM_FALSE_NODE,
4030  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4031  .node_id = PM_NODE_IDENTIFY(parser),
4032  .location = PM_LOCATION_TOKEN_VALUE(token)
4033  }};
4034 
4035  return node;
4036 }
4037 
4042 static pm_find_pattern_node_t *
4043 pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4044  pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4045 
4046  pm_node_t *left = nodes->nodes[0];
4047  assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4048  pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4049 
4050  pm_node_t *right;
4051 
4052  if (nodes->size == 1) {
4053  right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4054  } else {
4055  right = nodes->nodes[nodes->size - 1];
4056  assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4057  }
4058 
4059 #if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4060  // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4061  // The resulting AST will anyway be ignored, but this file still needs to compile.
4062  pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4063 #else
4064  pm_node_t *right_splat_node = right;
4065 #endif
4066  *node = (pm_find_pattern_node_t) {
4067  {
4068  .type = PM_FIND_PATTERN_NODE,
4069  .node_id = PM_NODE_IDENTIFY(parser),
4070  .location = {
4071  .start = left->location.start,
4072  .end = right->location.end,
4073  },
4074  },
4075  .constant = NULL,
4076  .left = left_splat_node,
4077  .right = right_splat_node,
4078  .requireds = { 0 },
4079  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4080  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4081  };
4082 
4083  // For now we're going to just copy over each pointer manually. This could be
4084  // much more efficient, as we could instead resize the node list to only point
4085  // to 1...-1.
4086  for (size_t index = 1; index < nodes->size - 1; index++) {
4087  pm_node_list_append(&node->requireds, nodes->nodes[index]);
4088  }
4089 
4090  return node;
4091 }
4092 
4097 static double
4098 pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4099  ptrdiff_t diff = token->end - token->start;
4100  if (diff <= 0) return 0.0;
4101 
4102  // First, get a buffer of the content.
4103  size_t length = (size_t) diff;
4104  char *buffer = xmalloc(sizeof(char) * (length + 1));
4105  memcpy((void *) buffer, token->start, length);
4106 
4107  // Next, determine if we need to replace the decimal point because of
4108  // locale-specific options, and then normalize them if we have to.
4109  char decimal_point = *localeconv()->decimal_point;
4110  if (decimal_point != '.') {
4111  for (size_t index = 0; index < length; index++) {
4112  if (buffer[index] == '.') buffer[index] = decimal_point;
4113  }
4114  }
4115 
4116  // Next, handle underscores by removing them from the buffer.
4117  for (size_t index = 0; index < length; index++) {
4118  if (buffer[index] == '_') {
4119  memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4120  length--;
4121  }
4122  }
4123 
4124  // Null-terminate the buffer so that strtod cannot read off the end.
4125  buffer[length] = '\0';
4126 
4127  // Now, call strtod to parse the value. Note that CRuby has their own
4128  // version of strtod which avoids locales. We're okay using the locale-aware
4129  // version because we've already validated through the parser that the token
4130  // is in a valid format.
4131  errno = 0;
4132  char *eptr;
4133  double value = strtod(buffer, &eptr);
4134 
4135  // This should never happen, because we've already checked that the token
4136  // is in a valid format. However it's good to be safe.
4137  if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4138  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4139  xfree((void *) buffer);
4140  return 0.0;
4141  }
4142 
4143  // If errno is set, then it should only be ERANGE. At this point we need to
4144  // check if it's infinity (it should be).
4145  if (errno == ERANGE && isinf(value)) {
4146  int warn_width;
4147  const char *ellipsis;
4148 
4149  if (length > 20) {
4150  warn_width = 20;
4151  ellipsis = "...";
4152  } else {
4153  warn_width = (int) length;
4154  ellipsis = "";
4155  }
4156 
4157  pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4158  value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4159  }
4160 
4161  // Finally we can free the buffer and return the value.
4162  xfree((void *) buffer);
4163  return value;
4164 }
4165 
4169 static pm_float_node_t *
4170 pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4171  assert(token->type == PM_TOKEN_FLOAT);
4172  pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4173 
4174  *node = (pm_float_node_t) {
4175  {
4176  .type = PM_FLOAT_NODE,
4177  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4178  .node_id = PM_NODE_IDENTIFY(parser),
4179  .location = PM_LOCATION_TOKEN_VALUE(token)
4180  },
4181  .value = pm_double_parse(parser, token)
4182  };
4183 
4184  return node;
4185 }
4186 
4190 static pm_imaginary_node_t *
4191 pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4192  assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4193 
4194  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4195  *node = (pm_imaginary_node_t) {
4196  {
4197  .type = PM_IMAGINARY_NODE,
4198  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4199  .node_id = PM_NODE_IDENTIFY(parser),
4200  .location = PM_LOCATION_TOKEN_VALUE(token)
4201  },
4202  .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4203  .type = PM_TOKEN_FLOAT,
4204  .start = token->start,
4205  .end = token->end - 1
4206  }))
4207  };
4208 
4209  return node;
4210 }
4211 
4215 static pm_rational_node_t *
4216 pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4217  assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4218 
4219  pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4220  *node = (pm_rational_node_t) {
4221  {
4222  .type = PM_RATIONAL_NODE,
4223  .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4224  .node_id = PM_NODE_IDENTIFY(parser),
4225  .location = PM_LOCATION_TOKEN_VALUE(token)
4226  },
4227  .numerator = { 0 },
4228  .denominator = { 0 }
4229  };
4230 
4231  const uint8_t *start = token->start;
4232  const uint8_t *end = token->end - 1; // r
4233 
4234  while (start < end && *start == '0') start++; // 0.1 -> .1
4235  while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4236 
4237  size_t length = (size_t) (end - start);
4238  if (length == 1) {
4239  node->denominator.value = 1;
4240  return node;
4241  }
4242 
4243  const uint8_t *point = memchr(start, '.', length);
4244  assert(point && "should have a decimal point");
4245 
4246  uint8_t *digits = malloc(length);
4247  if (digits == NULL) {
4248  fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4249  abort();
4250  }
4251 
4252  memcpy(digits, start, (unsigned long) (point - start));
4253  memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4254  pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4255 
4256  digits[0] = '1';
4257  if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4258  pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4259  free(digits);
4260 
4261  pm_integers_reduce(&node->numerator, &node->denominator);
4262  return node;
4263 }
4264 
4269 static pm_imaginary_node_t *
4270 pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4271  assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4272 
4273  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4274  *node = (pm_imaginary_node_t) {
4275  {
4276  .type = PM_IMAGINARY_NODE,
4277  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4278  .node_id = PM_NODE_IDENTIFY(parser),
4279  .location = PM_LOCATION_TOKEN_VALUE(token)
4280  },
4281  .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4282  .type = PM_TOKEN_FLOAT_RATIONAL,
4283  .start = token->start,
4284  .end = token->end - 1
4285  }))
4286  };
4287 
4288  return node;
4289 }
4290 
4294 static pm_for_node_t *
4295 pm_for_node_create(
4296  pm_parser_t *parser,
4297  pm_node_t *index,
4298  pm_node_t *collection,
4299  pm_statements_node_t *statements,
4300  const pm_token_t *for_keyword,
4301  const pm_token_t *in_keyword,
4302  const pm_token_t *do_keyword,
4303  const pm_token_t *end_keyword
4304 ) {
4305  pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4306 
4307  *node = (pm_for_node_t) {
4308  {
4309  .type = PM_FOR_NODE,
4310  .node_id = PM_NODE_IDENTIFY(parser),
4311  .location = {
4312  .start = for_keyword->start,
4313  .end = end_keyword->end
4314  },
4315  },
4316  .index = index,
4317  .collection = collection,
4318  .statements = statements,
4319  .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4320  .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4321  .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4322  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4323  };
4324 
4325  return node;
4326 }
4327 
4331 static pm_forwarding_arguments_node_t *
4332 pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4333  assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4334  pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4335 
4336  *node = (pm_forwarding_arguments_node_t) {{
4337  .type = PM_FORWARDING_ARGUMENTS_NODE,
4338  .node_id = PM_NODE_IDENTIFY(parser),
4339  .location = PM_LOCATION_TOKEN_VALUE(token)
4340  }};
4341 
4342  return node;
4343 }
4344 
4348 static pm_forwarding_parameter_node_t *
4349 pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4350  assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4351  pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4352 
4353  *node = (pm_forwarding_parameter_node_t) {{
4354  .type = PM_FORWARDING_PARAMETER_NODE,
4355  .node_id = PM_NODE_IDENTIFY(parser),
4356  .location = PM_LOCATION_TOKEN_VALUE(token)
4357  }};
4358 
4359  return node;
4360 }
4361 
4365 static pm_forwarding_super_node_t *
4366 pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4367  assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4368  assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4369  pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4370 
4371  pm_block_node_t *block = NULL;
4372  if (arguments->block != NULL) {
4373  block = (pm_block_node_t *) arguments->block;
4374  }
4375 
4376  *node = (pm_forwarding_super_node_t) {
4377  {
4378  .type = PM_FORWARDING_SUPER_NODE,
4379  .node_id = PM_NODE_IDENTIFY(parser),
4380  .location = {
4381  .start = token->start,
4382  .end = block != NULL ? block->base.location.end : token->end
4383  },
4384  },
4385  .block = block
4386  };
4387 
4388  return node;
4389 }
4390 
4395 static pm_hash_pattern_node_t *
4396 pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4397  pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4398 
4399  *node = (pm_hash_pattern_node_t) {
4400  {
4401  .type = PM_HASH_PATTERN_NODE,
4402  .node_id = PM_NODE_IDENTIFY(parser),
4403  .location = {
4404  .start = opening->start,
4405  .end = closing->end
4406  },
4407  },
4408  .constant = NULL,
4409  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4410  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4411  .elements = { 0 },
4412  .rest = NULL
4413  };
4414 
4415  return node;
4416 }
4417 
4421 static pm_hash_pattern_node_t *
4422 pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4423  pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4424 
4425  const uint8_t *start;
4426  const uint8_t *end;
4427 
4428  if (elements->size > 0) {
4429  if (rest) {
4430  start = elements->nodes[0]->location.start;
4431  end = rest->location.end;
4432  } else {
4433  start = elements->nodes[0]->location.start;
4434  end = elements->nodes[elements->size - 1]->location.end;
4435  }
4436  } else {
4437  assert(rest != NULL);
4438  start = rest->location.start;
4439  end = rest->location.end;
4440  }
4441 
4442  *node = (pm_hash_pattern_node_t) {
4443  {
4444  .type = PM_HASH_PATTERN_NODE,
4445  .node_id = PM_NODE_IDENTIFY(parser),
4446  .location = {
4447  .start = start,
4448  .end = end
4449  },
4450  },
4451  .constant = NULL,
4452  .elements = { 0 },
4453  .rest = rest,
4454  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4455  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4456  };
4457 
4458  pm_node_t *element;
4459  PM_NODE_LIST_FOREACH(elements, index, element) {
4460  pm_node_list_append(&node->elements, element);
4461  }
4462 
4463  return node;
4464 }
4465 
4469 static pm_constant_id_t
4470 pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4471  switch (PM_NODE_TYPE(target)) {
4472  case PM_GLOBAL_VARIABLE_READ_NODE:
4473  return ((pm_global_variable_read_node_t *) target)->name;
4474  case PM_BACK_REFERENCE_READ_NODE:
4475  return ((pm_back_reference_read_node_t *) target)->name;
4476  case PM_NUMBERED_REFERENCE_READ_NODE:
4477  // This will only ever happen in the event of a syntax error, but we
4478  // still need to provide something for the node.
4479  return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4480  default:
4481  assert(false && "unreachable");
4482  return (pm_constant_id_t) -1;
4483  }
4484 }
4485 
4489 static pm_global_variable_and_write_node_t *
4490 pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4491  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4492  pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4493 
4494  *node = (pm_global_variable_and_write_node_t) {
4495  {
4496  .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4497  .node_id = PM_NODE_IDENTIFY(parser),
4498  .location = {
4499  .start = target->location.start,
4500  .end = value->location.end
4501  }
4502  },
4503  .name = pm_global_variable_write_name(parser, target),
4504  .name_loc = target->location,
4505  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4506  .value = value
4507  };
4508 
4509  return node;
4510 }
4511 
4515 static pm_global_variable_operator_write_node_t *
4516 pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4517  pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4518 
4519  *node = (pm_global_variable_operator_write_node_t) {
4520  {
4521  .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4522  .node_id = PM_NODE_IDENTIFY(parser),
4523  .location = {
4524  .start = target->location.start,
4525  .end = value->location.end
4526  }
4527  },
4528  .name = pm_global_variable_write_name(parser, target),
4529  .name_loc = target->location,
4530  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4531  .value = value,
4532  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4533  };
4534 
4535  return node;
4536 }
4537 
4541 static pm_global_variable_or_write_node_t *
4542 pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4543  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4544  pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4545 
4546  *node = (pm_global_variable_or_write_node_t) {
4547  {
4548  .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4549  .node_id = PM_NODE_IDENTIFY(parser),
4550  .location = {
4551  .start = target->location.start,
4552  .end = value->location.end
4553  }
4554  },
4555  .name = pm_global_variable_write_name(parser, target),
4556  .name_loc = target->location,
4557  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4558  .value = value
4559  };
4560 
4561  return node;
4562 }
4563 
4567 static pm_global_variable_read_node_t *
4568 pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4569  pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4570 
4571  *node = (pm_global_variable_read_node_t) {
4572  {
4573  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4574  .node_id = PM_NODE_IDENTIFY(parser),
4575  .location = PM_LOCATION_TOKEN_VALUE(name),
4576  },
4577  .name = pm_parser_constant_id_token(parser, name)
4578  };
4579 
4580  return node;
4581 }
4582 
4586 static pm_global_variable_read_node_t *
4587 pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4588  pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4589 
4590  *node = (pm_global_variable_read_node_t) {
4591  {
4592  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4593  .node_id = PM_NODE_IDENTIFY(parser),
4594  .location = PM_LOCATION_NULL_VALUE(parser)
4595  },
4596  .name = name
4597  };
4598 
4599  return node;
4600 }
4601 
4605 static pm_global_variable_write_node_t *
4606 pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4607  pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4608 
4609  *node = (pm_global_variable_write_node_t) {
4610  {
4611  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4612  .node_id = PM_NODE_IDENTIFY(parser),
4613  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4614  .location = {
4615  .start = target->location.start,
4616  .end = value->location.end
4617  },
4618  },
4619  .name = pm_global_variable_write_name(parser, target),
4620  .name_loc = PM_LOCATION_NODE_VALUE(target),
4621  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4622  .value = value
4623  };
4624 
4625  return node;
4626 }
4627 
4631 static pm_global_variable_write_node_t *
4632 pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4633  pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4634 
4635  *node = (pm_global_variable_write_node_t) {
4636  {
4637  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4638  .node_id = PM_NODE_IDENTIFY(parser),
4639  .location = PM_LOCATION_NULL_VALUE(parser)
4640  },
4641  .name = name,
4642  .name_loc = PM_LOCATION_NULL_VALUE(parser),
4643  .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4644  .value = value
4645  };
4646 
4647  return node;
4648 }
4649 
4653 static pm_hash_node_t *
4654 pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4655  assert(opening != NULL);
4656  pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4657 
4658  *node = (pm_hash_node_t) {
4659  {
4660  .type = PM_HASH_NODE,
4661  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4662  .node_id = PM_NODE_IDENTIFY(parser),
4663  .location = PM_LOCATION_TOKEN_VALUE(opening)
4664  },
4665  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4666  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4667  .elements = { 0 }
4668  };
4669 
4670  return node;
4671 }
4672 
4676 static inline void
4677 pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4678  pm_node_list_append(&hash->elements, element);
4679 
4680  bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4681  if (static_literal) {
4682  pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4683  static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4684  static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4685  static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4686  }
4687 
4688  if (!static_literal) {
4689  pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4690  }
4691 }
4692 
4693 static inline void
4694 pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4695  hash->base.location.end = token->end;
4696  hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4697 }
4698 
4702 static pm_if_node_t *
4703 pm_if_node_create(pm_parser_t *parser,
4704  const pm_token_t *if_keyword,
4705  pm_node_t *predicate,
4706  const pm_token_t *then_keyword,
4707  pm_statements_node_t *statements,
4708  pm_node_t *subsequent,
4709  const pm_token_t *end_keyword
4710 ) {
4711  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4712  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4713 
4714  const uint8_t *end;
4715  if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4716  end = end_keyword->end;
4717  } else if (subsequent != NULL) {
4718  end = subsequent->location.end;
4719  } else if (pm_statements_node_body_length(statements) != 0) {
4720  end = statements->base.location.end;
4721  } else {
4722  end = predicate->location.end;
4723  }
4724 
4725  *node = (pm_if_node_t) {
4726  {
4727  .type = PM_IF_NODE,
4728  .flags = PM_NODE_FLAG_NEWLINE,
4729  .node_id = PM_NODE_IDENTIFY(parser),
4730  .location = {
4731  .start = if_keyword->start,
4732  .end = end
4733  },
4734  },
4735  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4736  .predicate = predicate,
4737  .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4738  .statements = statements,
4739  .subsequent = subsequent,
4740  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4741  };
4742 
4743  return node;
4744 }
4745 
4749 static pm_if_node_t *
4750 pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4751  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4752  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4753 
4754  pm_statements_node_t *statements = pm_statements_node_create(parser);
4755  pm_statements_node_body_append(parser, statements, statement, true);
4756 
4757  *node = (pm_if_node_t) {
4758  {
4759  .type = PM_IF_NODE,
4760  .flags = PM_NODE_FLAG_NEWLINE,
4761  .node_id = PM_NODE_IDENTIFY(parser),
4762  .location = {
4763  .start = statement->location.start,
4764  .end = predicate->location.end
4765  },
4766  },
4767  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4768  .predicate = predicate,
4769  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4770  .statements = statements,
4771  .subsequent = NULL,
4772  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4773  };
4774 
4775  return node;
4776 }
4777 
4781 static pm_if_node_t *
4782 pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4783  pm_assert_value_expression(parser, predicate);
4784  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4785 
4786  pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4787  pm_statements_node_body_append(parser, if_statements, true_expression, true);
4788 
4789  pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4790  pm_statements_node_body_append(parser, else_statements, false_expression, true);
4791 
4792  pm_token_t end_keyword = not_provided(parser);
4793  pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4794 
4795  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4796 
4797  *node = (pm_if_node_t) {
4798  {
4799  .type = PM_IF_NODE,
4800  .flags = PM_NODE_FLAG_NEWLINE,
4801  .node_id = PM_NODE_IDENTIFY(parser),
4802  .location = {
4803  .start = predicate->location.start,
4804  .end = false_expression->location.end,
4805  },
4806  },
4807  .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4808  .predicate = predicate,
4809  .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4810  .statements = if_statements,
4811  .subsequent = (pm_node_t *) else_node,
4812  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4813  };
4814 
4815  return node;
4816 
4817 }
4818 
4819 static inline void
4820 pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4821  node->base.location.end = keyword->end;
4822  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4823 }
4824 
4825 static inline void
4826 pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4827  node->base.location.end = keyword->end;
4828  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4829 }
4830 
4834 static pm_implicit_node_t *
4835 pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4836  pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4837 
4838  *node = (pm_implicit_node_t) {
4839  {
4840  .type = PM_IMPLICIT_NODE,
4841  .node_id = PM_NODE_IDENTIFY(parser),
4842  .location = value->location
4843  },
4844  .value = value
4845  };
4846 
4847  return node;
4848 }
4849 
4853 static pm_implicit_rest_node_t *
4854 pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4855  assert(token->type == PM_TOKEN_COMMA);
4856 
4857  pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4858 
4859  *node = (pm_implicit_rest_node_t) {
4860  {
4861  .type = PM_IMPLICIT_REST_NODE,
4862  .node_id = PM_NODE_IDENTIFY(parser),
4863  .location = PM_LOCATION_TOKEN_VALUE(token)
4864  }
4865  };
4866 
4867  return node;
4868 }
4869 
4873 static pm_integer_node_t *
4874 pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4875  assert(token->type == PM_TOKEN_INTEGER);
4876  pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4877 
4878  *node = (pm_integer_node_t) {
4879  {
4880  .type = PM_INTEGER_NODE,
4881  .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4882  .node_id = PM_NODE_IDENTIFY(parser),
4883  .location = PM_LOCATION_TOKEN_VALUE(token)
4884  },
4885  .value = { 0 }
4886  };
4887 
4888  pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4889  switch (base) {
4890  case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4891  case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4892  case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4893  case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4894  default: assert(false && "unreachable"); break;
4895  }
4896 
4897  pm_integer_parse(&node->value, integer_base, token->start, token->end);
4898  return node;
4899 }
4900 
4905 static pm_imaginary_node_t *
4906 pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4907  assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4908 
4909  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4910  *node = (pm_imaginary_node_t) {
4911  {
4912  .type = PM_IMAGINARY_NODE,
4913  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4914  .node_id = PM_NODE_IDENTIFY(parser),
4915  .location = PM_LOCATION_TOKEN_VALUE(token)
4916  },
4917  .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4918  .type = PM_TOKEN_INTEGER,
4919  .start = token->start,
4920  .end = token->end - 1
4921  }))
4922  };
4923 
4924  return node;
4925 }
4926 
4931 static pm_rational_node_t *
4932 pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4933  assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4934 
4935  pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4936  *node = (pm_rational_node_t) {
4937  {
4938  .type = PM_RATIONAL_NODE,
4939  .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4940  .node_id = PM_NODE_IDENTIFY(parser),
4941  .location = PM_LOCATION_TOKEN_VALUE(token)
4942  },
4943  .numerator = { 0 },
4944  .denominator = { .value = 1, 0 }
4945  };
4946 
4947  pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4948  switch (base) {
4949  case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4950  case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4951  case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4952  case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4953  default: assert(false && "unreachable"); break;
4954  }
4955 
4956  pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4957 
4958  return node;
4959 }
4960 
4965 static pm_imaginary_node_t *
4966 pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4967  assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4968 
4969  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4970  *node = (pm_imaginary_node_t) {
4971  {
4972  .type = PM_IMAGINARY_NODE,
4973  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4974  .node_id = PM_NODE_IDENTIFY(parser),
4975  .location = PM_LOCATION_TOKEN_VALUE(token)
4976  },
4977  .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4978  .type = PM_TOKEN_INTEGER_RATIONAL,
4979  .start = token->start,
4980  .end = token->end - 1
4981  }))
4982  };
4983 
4984  return node;
4985 }
4986 
4990 static pm_in_node_t *
4991 pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4992  pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4993 
4994  const uint8_t *end;
4995  if (statements != NULL) {
4996  end = statements->base.location.end;
4997  } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4998  end = then_keyword->end;
4999  } else {
5000  end = pattern->location.end;
5001  }
5002 
5003  *node = (pm_in_node_t) {
5004  {
5005  .type = PM_IN_NODE,
5006  .node_id = PM_NODE_IDENTIFY(parser),
5007  .location = {
5008  .start = in_keyword->start,
5009  .end = end
5010  },
5011  },
5012  .pattern = pattern,
5013  .statements = statements,
5014  .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5015  .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5016  };
5017 
5018  return node;
5019 }
5020 
5024 static pm_instance_variable_and_write_node_t *
5025 pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5026  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5027  pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5028 
5029  *node = (pm_instance_variable_and_write_node_t) {
5030  {
5031  .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5032  .node_id = PM_NODE_IDENTIFY(parser),
5033  .location = {
5034  .start = target->base.location.start,
5035  .end = value->location.end
5036  }
5037  },
5038  .name = target->name,
5039  .name_loc = target->base.location,
5040  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5041  .value = value
5042  };
5043 
5044  return node;
5045 }
5046 
5050 static pm_instance_variable_operator_write_node_t *
5051 pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5052  pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5053 
5054  *node = (pm_instance_variable_operator_write_node_t) {
5055  {
5056  .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5057  .node_id = PM_NODE_IDENTIFY(parser),
5058  .location = {
5059  .start = target->base.location.start,
5060  .end = value->location.end
5061  }
5062  },
5063  .name = target->name,
5064  .name_loc = target->base.location,
5065  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5066  .value = value,
5067  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5068  };
5069 
5070  return node;
5071 }
5072 
5076 static pm_instance_variable_or_write_node_t *
5077 pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5078  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5079  pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5080 
5081  *node = (pm_instance_variable_or_write_node_t) {
5082  {
5083  .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5084  .node_id = PM_NODE_IDENTIFY(parser),
5085  .location = {
5086  .start = target->base.location.start,
5087  .end = value->location.end
5088  }
5089  },
5090  .name = target->name,
5091  .name_loc = target->base.location,
5092  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5093  .value = value
5094  };
5095 
5096  return node;
5097 }
5098 
5102 static pm_instance_variable_read_node_t *
5103 pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5104  assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5105  pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5106 
5107  *node = (pm_instance_variable_read_node_t) {
5108  {
5109  .type = PM_INSTANCE_VARIABLE_READ_NODE,
5110  .node_id = PM_NODE_IDENTIFY(parser),
5111  .location = PM_LOCATION_TOKEN_VALUE(token)
5112  },
5113  .name = pm_parser_constant_id_token(parser, token)
5114  };
5115 
5116  return node;
5117 }
5118 
5123 static pm_instance_variable_write_node_t *
5124 pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5125  pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5126  *node = (pm_instance_variable_write_node_t) {
5127  {
5128  .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5129  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5130  .node_id = PM_NODE_IDENTIFY(parser),
5131  .location = {
5132  .start = read_node->base.location.start,
5133  .end = value->location.end
5134  }
5135  },
5136  .name = read_node->name,
5137  .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5138  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5139  .value = value
5140  };
5141 
5142  return node;
5143 }
5144 
5150 static void
5151 pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5152  switch (PM_NODE_TYPE(part)) {
5153  case PM_STRING_NODE:
5154  pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5155  break;
5156  case PM_EMBEDDED_STATEMENTS_NODE: {
5157  pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5158  pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5159 
5160  if (embedded == NULL) {
5161  // If there are no statements or more than one statement, then
5162  // we lose the static literal flag.
5163  pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5164  } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5165  // If the embedded statement is a string, then we can keep the
5166  // static literal flag and mark the string as frozen.
5167  pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5168  } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5169  // If the embedded statement is an interpolated string and it's
5170  // a static literal, then we can keep the static literal flag.
5171  } else {
5172  // Otherwise we lose the static literal flag.
5173  pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174  }
5175 
5176  break;
5177  }
5178  case PM_EMBEDDED_VARIABLE_NODE:
5179  pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5180  break;
5181  default:
5182  assert(false && "unexpected node type");
5183  break;
5184  }
5185 
5186  pm_node_list_append(parts, part);
5187 }
5188 
5192 static pm_interpolated_regular_expression_node_t *
5193 pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5194  pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5195 
5196  *node = (pm_interpolated_regular_expression_node_t) {
5197  {
5198  .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5199  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5200  .node_id = PM_NODE_IDENTIFY(parser),
5201  .location = {
5202  .start = opening->start,
5203  .end = NULL,
5204  },
5205  },
5206  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5207  .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5208  .parts = { 0 }
5209  };
5210 
5211  return node;
5212 }
5213 
5214 static inline void
5215 pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5216  if (node->base.location.start > part->location.start) {
5217  node->base.location.start = part->location.start;
5218  }
5219  if (node->base.location.end < part->location.end) {
5220  node->base.location.end = part->location.end;
5221  }
5222 
5223  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5224 }
5225 
5226 static inline void
5227 pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5228  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5229  node->base.location.end = closing->end;
5230  pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5231 }
5232 
5256 static inline void
5257 pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5258 #define CLEAR_FLAGS(node) \
5259  node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5260 
5261 #define MUTABLE_FLAGS(node) \
5262  node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5263 
5264  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5265  node->base.location.start = part->location.start;
5266  }
5267 
5268  node->base.location.end = MAX(node->base.location.end, part->location.end);
5269 
5270  switch (PM_NODE_TYPE(part)) {
5271  case PM_STRING_NODE:
5272  part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5273  break;
5274  case PM_INTERPOLATED_STRING_NODE:
5275  if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5276  // If the string that we're concatenating is a static literal,
5277  // then we can keep the static literal flag for this string.
5278  } else {
5279  // Otherwise, we lose the static literal flag here and we should
5280  // also clear the mutability flags.
5281  CLEAR_FLAGS(node);
5282  }
5283  break;
5284  case PM_EMBEDDED_STATEMENTS_NODE: {
5285  pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5286  pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5287 
5288  if (embedded == NULL) {
5289  // If we're embedding multiple statements or no statements, then
5290  // the string is not longer a static literal.
5291  CLEAR_FLAGS(node);
5292  } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5293  // If the embedded statement is a string, then we can make that
5294  // string as frozen and static literal, and not touch the static
5295  // literal status of this string.
5296  embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5297 
5298  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5299  MUTABLE_FLAGS(node);
5300  }
5301  } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5302  // If the embedded statement is an interpolated string, but that
5303  // string is marked as static literal, then we can keep our
5304  // static literal status for this string.
5305  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5306  MUTABLE_FLAGS(node);
5307  }
5308  } else {
5309  // In all other cases, we lose the static literal flag here and
5310  // become mutable.
5311  CLEAR_FLAGS(node);
5312  }
5313 
5314  break;
5315  }
5316  case PM_EMBEDDED_VARIABLE_NODE:
5317  // Embedded variables clear static literal, which means we also
5318  // should clear the mutability flags.
5319  CLEAR_FLAGS(node);
5320  break;
5321  default:
5322  assert(false && "unexpected node type");
5323  break;
5324  }
5325 
5326  pm_node_list_append(&node->parts, part);
5327 
5328 #undef CLEAR_FLAGS
5329 #undef MUTABLE_FLAGS
5330 }
5331 
5335 static pm_interpolated_string_node_t *
5336 pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5337  pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5338  pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5339 
5340  switch (parser->frozen_string_literal) {
5341  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5342  flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5343  break;
5344  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5345  flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5346  break;
5347  }
5348 
5349  *node = (pm_interpolated_string_node_t) {
5350  {
5351  .type = PM_INTERPOLATED_STRING_NODE,
5352  .flags = flags,
5353  .node_id = PM_NODE_IDENTIFY(parser),
5354  .location = {
5355  .start = opening->start,
5356  .end = closing->end,
5357  },
5358  },
5359  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5360  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5361  .parts = { 0 }
5362  };
5363 
5364  if (parts != NULL) {
5365  pm_node_t *part;
5366  PM_NODE_LIST_FOREACH(parts, index, part) {
5367  pm_interpolated_string_node_append(node, part);
5368  }
5369  }
5370 
5371  return node;
5372 }
5373 
5377 static void
5378 pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5379  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5380  node->base.location.end = closing->end;
5381 }
5382 
5383 static void
5384 pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5385  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5386  node->base.location.start = part->location.start;
5387  }
5388 
5389  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5390  node->base.location.end = MAX(node->base.location.end, part->location.end);
5391 }
5392 
5393 static void
5394 pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5395  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5396  node->base.location.end = closing->end;
5397 }
5398 
5402 static pm_interpolated_symbol_node_t *
5403 pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5404  pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5405 
5406  *node = (pm_interpolated_symbol_node_t) {
5407  {
5408  .type = PM_INTERPOLATED_SYMBOL_NODE,
5409  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5410  .node_id = PM_NODE_IDENTIFY(parser),
5411  .location = {
5412  .start = opening->start,
5413  .end = closing->end,
5414  },
5415  },
5416  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5417  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5418  .parts = { 0 }
5419  };
5420 
5421  if (parts != NULL) {
5422  pm_node_t *part;
5423  PM_NODE_LIST_FOREACH(parts, index, part) {
5424  pm_interpolated_symbol_node_append(node, part);
5425  }
5426  }
5427 
5428  return node;
5429 }
5430 
5434 static pm_interpolated_x_string_node_t *
5435 pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5436  pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5437 
5438  *node = (pm_interpolated_x_string_node_t) {
5439  {
5440  .type = PM_INTERPOLATED_X_STRING_NODE,
5441  .node_id = PM_NODE_IDENTIFY(parser),
5442  .location = {
5443  .start = opening->start,
5444  .end = closing->end
5445  },
5446  },
5447  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5448  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5449  .parts = { 0 }
5450  };
5451 
5452  return node;
5453 }
5454 
5455 static inline void
5456 pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5457  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5458  node->base.location.end = part->location.end;
5459 }
5460 
5461 static inline void
5462 pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5463  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5464  node->base.location.end = closing->end;
5465 }
5466 
5470 static pm_it_local_variable_read_node_t *
5471 pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5472  pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5473 
5474  *node = (pm_it_local_variable_read_node_t) {
5475  {
5476  .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5477  .node_id = PM_NODE_IDENTIFY(parser),
5478  .location = PM_LOCATION_TOKEN_VALUE(name)
5479  }
5480  };
5481 
5482  return node;
5483 }
5484 
5488 static pm_it_parameters_node_t *
5489 pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5490  pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5491 
5492  *node = (pm_it_parameters_node_t) {
5493  {
5494  .type = PM_IT_PARAMETERS_NODE,
5495  .node_id = PM_NODE_IDENTIFY(parser),
5496  .location = {
5497  .start = opening->start,
5498  .end = closing->end
5499  }
5500  }
5501  };
5502 
5503  return node;
5504 }
5505 
5509 static pm_keyword_hash_node_t *
5510 pm_keyword_hash_node_create(pm_parser_t *parser) {
5511  pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5512 
5513  *node = (pm_keyword_hash_node_t) {
5514  .base = {
5515  .type = PM_KEYWORD_HASH_NODE,
5516  .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5517  .node_id = PM_NODE_IDENTIFY(parser),
5518  .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5519  },
5520  .elements = { 0 }
5521  };
5522 
5523  return node;
5524 }
5525 
5529 static void
5530 pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5531  // If the element being added is not an AssocNode or does not have a symbol
5532  // key, then we want to turn the SYMBOL_KEYS flag off.
5533  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5534  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5535  }
5536 
5537  pm_node_list_append(&hash->elements, element);
5538  if (hash->base.location.start == NULL) {
5539  hash->base.location.start = element->location.start;
5540  }
5541  hash->base.location.end = element->location.end;
5542 }
5543 
5547 static pm_required_keyword_parameter_node_t *
5548 pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5549  pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5550 
5551  *node = (pm_required_keyword_parameter_node_t) {
5552  {
5553  .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5554  .node_id = PM_NODE_IDENTIFY(parser),
5555  .location = {
5556  .start = name->start,
5557  .end = name->end
5558  },
5559  },
5560  .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5561  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5562  };
5563 
5564  return node;
5565 }
5566 
5570 static pm_optional_keyword_parameter_node_t *
5571 pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5572  pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5573 
5574  *node = (pm_optional_keyword_parameter_node_t) {
5575  {
5576  .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5577  .node_id = PM_NODE_IDENTIFY(parser),
5578  .location = {
5579  .start = name->start,
5580  .end = value->location.end
5581  },
5582  },
5583  .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5584  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5585  .value = value
5586  };
5587 
5588  return node;
5589 }
5590 
5594 static pm_keyword_rest_parameter_node_t *
5595 pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5596  pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5597 
5598  *node = (pm_keyword_rest_parameter_node_t) {
5599  {
5600  .type = PM_KEYWORD_REST_PARAMETER_NODE,
5601  .node_id = PM_NODE_IDENTIFY(parser),
5602  .location = {
5603  .start = operator->start,
5604  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5605  },
5606  },
5607  .name = pm_parser_optional_constant_id_token(parser, name),
5608  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5609  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5610  };
5611 
5612  return node;
5613 }
5614 
5618 static pm_lambda_node_t *
5619 pm_lambda_node_create(
5620  pm_parser_t *parser,
5621  pm_constant_id_list_t *locals,
5622  const pm_token_t *operator,
5623  const pm_token_t *opening,
5624  const pm_token_t *closing,
5625  pm_node_t *parameters,
5626  pm_node_t *body
5627 ) {
5628  pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5629 
5630  *node = (pm_lambda_node_t) {
5631  {
5632  .type = PM_LAMBDA_NODE,
5633  .node_id = PM_NODE_IDENTIFY(parser),
5634  .location = {
5635  .start = operator->start,
5636  .end = closing->end
5637  },
5638  },
5639  .locals = *locals,
5640  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5641  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5642  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5643  .parameters = parameters,
5644  .body = body
5645  };
5646 
5647  return node;
5648 }
5649 
5653 static pm_local_variable_and_write_node_t *
5654 pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5655  assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5656  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5657  pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5658 
5659  *node = (pm_local_variable_and_write_node_t) {
5660  {
5661  .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5662  .node_id = PM_NODE_IDENTIFY(parser),
5663  .location = {
5664  .start = target->location.start,
5665  .end = value->location.end
5666  }
5667  },
5668  .name_loc = target->location,
5669  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5670  .value = value,
5671  .name = name,
5672  .depth = depth
5673  };
5674 
5675  return node;
5676 }
5677 
5681 static pm_local_variable_operator_write_node_t *
5682 pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5683  pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5684 
5685  *node = (pm_local_variable_operator_write_node_t) {
5686  {
5687  .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5688  .node_id = PM_NODE_IDENTIFY(parser),
5689  .location = {
5690  .start = target->location.start,
5691  .end = value->location.end
5692  }
5693  },
5694  .name_loc = target->location,
5695  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5696  .value = value,
5697  .name = name,
5698  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5699  .depth = depth
5700  };
5701 
5702  return node;
5703 }
5704 
5708 static pm_local_variable_or_write_node_t *
5709 pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5710  assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5711  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5712  pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5713 
5714  *node = (pm_local_variable_or_write_node_t) {
5715  {
5716  .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5717  .node_id = PM_NODE_IDENTIFY(parser),
5718  .location = {
5719  .start = target->location.start,
5720  .end = value->location.end
5721  }
5722  },
5723  .name_loc = target->location,
5724  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5725  .value = value,
5726  .name = name,
5727  .depth = depth
5728  };
5729 
5730  return node;
5731 }
5732 
5736 static pm_local_variable_read_node_t *
5737 pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5738  if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5739 
5740  pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5741 
5742  *node = (pm_local_variable_read_node_t) {
5743  {
5744  .type = PM_LOCAL_VARIABLE_READ_NODE,
5745  .node_id = PM_NODE_IDENTIFY(parser),
5746  .location = PM_LOCATION_TOKEN_VALUE(name)
5747  },
5748  .name = name_id,
5749  .depth = depth
5750  };
5751 
5752  return node;
5753 }
5754 
5758 static pm_local_variable_read_node_t *
5759 pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5760  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5761  return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5762 }
5763 
5768 static pm_local_variable_read_node_t *
5769 pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5770  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5771  return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5772 }
5773 
5777 static pm_local_variable_write_node_t *
5778 pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5779  pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5780 
5781  *node = (pm_local_variable_write_node_t) {
5782  {
5783  .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5784  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5785  .node_id = PM_NODE_IDENTIFY(parser),
5786  .location = {
5787  .start = name_loc->start,
5788  .end = value->location.end
5789  }
5790  },
5791  .name = name,
5792  .depth = depth,
5793  .value = value,
5794  .name_loc = *name_loc,
5795  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5796  };
5797 
5798  return node;
5799 }
5800 
5804 static inline bool
5805 pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5806  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5807 }
5808 
5813 static inline bool
5814 pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5815  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5816 }
5817 
5822 static inline void
5823 pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5824  if (pm_token_is_numbered_parameter(start, end)) {
5825  PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5826  }
5827 }
5828 
5833 static pm_local_variable_target_node_t *
5834 pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5835  pm_refute_numbered_parameter(parser, location->start, location->end);
5836  pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5837 
5838  *node = (pm_local_variable_target_node_t) {
5839  {
5840  .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5841  .node_id = PM_NODE_IDENTIFY(parser),
5842  .location = *location
5843  },
5844  .name = name,
5845  .depth = depth
5846  };
5847 
5848  return node;
5849 }
5850 
5854 static pm_match_predicate_node_t *
5855 pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5856  pm_assert_value_expression(parser, value);
5857 
5858  pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5859 
5860  *node = (pm_match_predicate_node_t) {
5861  {
5862  .type = PM_MATCH_PREDICATE_NODE,
5863  .node_id = PM_NODE_IDENTIFY(parser),
5864  .location = {
5865  .start = value->location.start,
5866  .end = pattern->location.end
5867  }
5868  },
5869  .value = value,
5870  .pattern = pattern,
5871  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5872  };
5873 
5874  return node;
5875 }
5876 
5880 static pm_match_required_node_t *
5881 pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5882  pm_assert_value_expression(parser, value);
5883 
5884  pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5885 
5886  *node = (pm_match_required_node_t) {
5887  {
5888  .type = PM_MATCH_REQUIRED_NODE,
5889  .node_id = PM_NODE_IDENTIFY(parser),
5890  .location = {
5891  .start = value->location.start,
5892  .end = pattern->location.end
5893  }
5894  },
5895  .value = value,
5896  .pattern = pattern,
5897  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5898  };
5899 
5900  return node;
5901 }
5902 
5906 static pm_match_write_node_t *
5907 pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5908  pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5909 
5910  *node = (pm_match_write_node_t) {
5911  {
5912  .type = PM_MATCH_WRITE_NODE,
5913  .node_id = PM_NODE_IDENTIFY(parser),
5914  .location = call->base.location
5915  },
5916  .call = call,
5917  .targets = { 0 }
5918  };
5919 
5920  return node;
5921 }
5922 
5926 static pm_module_node_t *
5927 pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5928  pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5929 
5930  *node = (pm_module_node_t) {
5931  {
5932  .type = PM_MODULE_NODE,
5933  .node_id = PM_NODE_IDENTIFY(parser),
5934  .location = {
5935  .start = module_keyword->start,
5936  .end = end_keyword->end
5937  }
5938  },
5939  .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5940  .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5941  .constant_path = constant_path,
5942  .body = body,
5943  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5944  .name = pm_parser_constant_id_token(parser, name)
5945  };
5946 
5947  return node;
5948 }
5949 
5953 static pm_multi_target_node_t *
5954 pm_multi_target_node_create(pm_parser_t *parser) {
5955  pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5956 
5957  *node = (pm_multi_target_node_t) {
5958  {
5959  .type = PM_MULTI_TARGET_NODE,
5960  .node_id = PM_NODE_IDENTIFY(parser),
5961  .location = { .start = NULL, .end = NULL }
5962  },
5963  .lefts = { 0 },
5964  .rest = NULL,
5965  .rights = { 0 },
5966  .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5967  .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5968  };
5969 
5970  return node;
5971 }
5972 
5976 static void
5977 pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5978  if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5979  if (node->rest == NULL) {
5980  node->rest = target;
5981  } else {
5982  pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5983  pm_node_list_append(&node->rights, target);
5984  }
5985  } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5986  if (node->rest == NULL) {
5987  node->rest = target;
5988  } else {
5989  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5990  pm_node_list_append(&node->rights, target);
5991  }
5992  } else if (node->rest == NULL) {
5993  pm_node_list_append(&node->lefts, target);
5994  } else {
5995  pm_node_list_append(&node->rights, target);
5996  }
5997 
5998  if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5999  node->base.location.start = target->location.start;
6000  }
6001 
6002  if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6003  node->base.location.end = target->location.end;
6004  }
6005 }
6006 
6010 static void
6011 pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6012  node->base.location.start = lparen->start;
6013  node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6014 }
6015 
6019 static void
6020 pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6021  node->base.location.end = rparen->end;
6022  node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6023 }
6024 
6028 static pm_multi_write_node_t *
6029 pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6030  pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6031 
6032  *node = (pm_multi_write_node_t) {
6033  {
6034  .type = PM_MULTI_WRITE_NODE,
6035  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6036  .node_id = PM_NODE_IDENTIFY(parser),
6037  .location = {
6038  .start = target->base.location.start,
6039  .end = value->location.end
6040  }
6041  },
6042  .lefts = target->lefts,
6043  .rest = target->rest,
6044  .rights = target->rights,
6045  .lparen_loc = target->lparen_loc,
6046  .rparen_loc = target->rparen_loc,
6047  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6048  .value = value
6049  };
6050 
6051  // Explicitly do not call pm_node_destroy here because we want to keep
6052  // around all of the information within the MultiWriteNode node.
6053  xfree(target);
6054 
6055  return node;
6056 }
6057 
6061 static pm_next_node_t *
6062 pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6063  assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6064  pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6065 
6066  *node = (pm_next_node_t) {
6067  {
6068  .type = PM_NEXT_NODE,
6069  .node_id = PM_NODE_IDENTIFY(parser),
6070  .location = {
6071  .start = keyword->start,
6072  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6073  }
6074  },
6075  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6076  .arguments = arguments
6077  };
6078 
6079  return node;
6080 }
6081 
6085 static pm_nil_node_t *
6086 pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6087  assert(token->type == PM_TOKEN_KEYWORD_NIL);
6088  pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6089 
6090  *node = (pm_nil_node_t) {{
6091  .type = PM_NIL_NODE,
6092  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6093  .node_id = PM_NODE_IDENTIFY(parser),
6094  .location = PM_LOCATION_TOKEN_VALUE(token)
6095  }};
6096 
6097  return node;
6098 }
6099 
6103 static pm_no_keywords_parameter_node_t *
6104 pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6105  assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6106  assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6107  pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6108 
6109  *node = (pm_no_keywords_parameter_node_t) {
6110  {
6111  .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6112  .node_id = PM_NODE_IDENTIFY(parser),
6113  .location = {
6114  .start = operator->start,
6115  .end = keyword->end
6116  }
6117  },
6118  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6119  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6120  };
6121 
6122  return node;
6123 }
6124 
6128 static pm_numbered_parameters_node_t *
6129 pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6130  pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6131 
6132  *node = (pm_numbered_parameters_node_t) {
6133  {
6134  .type = PM_NUMBERED_PARAMETERS_NODE,
6135  .node_id = PM_NODE_IDENTIFY(parser),
6136  .location = *location
6137  },
6138  .maximum = maximum
6139  };
6140 
6141  return node;
6142 }
6143 
6148 #define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6149 
6156 static uint32_t
6157 pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6158  const uint8_t *start = token->start + 1;
6159  const uint8_t *end = token->end;
6160 
6161  ptrdiff_t diff = end - start;
6162  assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
6163  size_t length = (size_t) diff;
6164 
6165  char *digits = xcalloc(length + 1, sizeof(char));
6166  memcpy(digits, start, length);
6167  digits[length] = '\0';
6168 
6169  char *endptr;
6170  errno = 0;
6171  unsigned long value = strtoul(digits, &endptr, 10);
6172 
6173  if ((digits == endptr) || (*endptr != '\0')) {
6174  pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6175  value = 0;
6176  }
6177 
6178  xfree(digits);
6179 
6180  if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6181  PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6182  value = 0;
6183  }
6184 
6185  return (uint32_t) value;
6186 }
6187 
6188 #undef NTH_REF_MAX
6189 
6193 static pm_numbered_reference_read_node_t *
6194 pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6195  assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6196  pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6197 
6198  *node = (pm_numbered_reference_read_node_t) {
6199  {
6200  .type = PM_NUMBERED_REFERENCE_READ_NODE,
6201  .node_id = PM_NODE_IDENTIFY(parser),
6202  .location = PM_LOCATION_TOKEN_VALUE(name),
6203  },
6204  .number = pm_numbered_reference_read_node_number(parser, name)
6205  };
6206 
6207  return node;
6208 }
6209 
6213 static pm_optional_parameter_node_t *
6214 pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6215  pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6216 
6217  *node = (pm_optional_parameter_node_t) {
6218  {
6219  .type = PM_OPTIONAL_PARAMETER_NODE,
6220  .node_id = PM_NODE_IDENTIFY(parser),
6221  .location = {
6222  .start = name->start,
6223  .end = value->location.end
6224  }
6225  },
6226  .name = pm_parser_constant_id_token(parser, name),
6227  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6228  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6229  .value = value
6230  };
6231 
6232  return node;
6233 }
6234 
6238 static pm_or_node_t *
6239 pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6240  pm_assert_value_expression(parser, left);
6241 
6242  pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6243 
6244  *node = (pm_or_node_t) {
6245  {
6246  .type = PM_OR_NODE,
6247  .node_id = PM_NODE_IDENTIFY(parser),
6248  .location = {
6249  .start = left->location.start,
6250  .end = right->location.end
6251  }
6252  },
6253  .left = left,
6254  .right = right,
6255  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6256  };
6257 
6258  return node;
6259 }
6260 
6264 static pm_parameters_node_t *
6265 pm_parameters_node_create(pm_parser_t *parser) {
6266  pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6267 
6268  *node = (pm_parameters_node_t) {
6269  {
6270  .type = PM_PARAMETERS_NODE,
6271  .node_id = PM_NODE_IDENTIFY(parser),
6272  .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6273  },
6274  .rest = NULL,
6275  .keyword_rest = NULL,
6276  .block = NULL,
6277  .requireds = { 0 },
6278  .optionals = { 0 },
6279  .posts = { 0 },
6280  .keywords = { 0 }
6281  };
6282 
6283  return node;
6284 }
6285 
6289 static void
6290 pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6291  if (params->base.location.start == NULL) {
6292  params->base.location.start = param->location.start;
6293  } else {
6294  params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6295  }
6296 
6297  if (params->base.location.end == NULL) {
6298  params->base.location.end = param->location.end;
6299  } else {
6300  params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6301  }
6302 }
6303 
6307 static void
6308 pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6309  pm_parameters_node_location_set(params, param);
6310  pm_node_list_append(&params->requireds, param);
6311 }
6312 
6316 static void
6317 pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6318  pm_parameters_node_location_set(params, (pm_node_t *) param);
6319  pm_node_list_append(&params->optionals, (pm_node_t *) param);
6320 }
6321 
6325 static void
6326 pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6327  pm_parameters_node_location_set(params, param);
6328  pm_node_list_append(&params->posts, param);
6329 }
6330 
6334 static void
6335 pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6336  pm_parameters_node_location_set(params, param);
6337  params->rest = param;
6338 }
6339 
6343 static void
6344 pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6345  pm_parameters_node_location_set(params, param);
6346  pm_node_list_append(&params->keywords, param);
6347 }
6348 
6352 static void
6353 pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6354  assert(params->keyword_rest == NULL);
6355  pm_parameters_node_location_set(params, param);
6356  params->keyword_rest = param;
6357 }
6358 
6362 static void
6363 pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6364  assert(params->block == NULL);
6365  pm_parameters_node_location_set(params, (pm_node_t *) param);
6366  params->block = param;
6367 }
6368 
6372 static pm_program_node_t *
6373 pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6374  pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6375 
6376  *node = (pm_program_node_t) {
6377  {
6378  .type = PM_PROGRAM_NODE,
6379  .node_id = PM_NODE_IDENTIFY(parser),
6380  .location = {
6381  .start = statements == NULL ? parser->start : statements->base.location.start,
6382  .end = statements == NULL ? parser->end : statements->base.location.end
6383  }
6384  },
6385  .locals = *locals,
6386  .statements = statements
6387  };
6388 
6389  return node;
6390 }
6391 
6395 static pm_parentheses_node_t *
6396 pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6397  pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6398 
6399  *node = (pm_parentheses_node_t) {
6400  {
6401  .type = PM_PARENTHESES_NODE,
6402  .node_id = PM_NODE_IDENTIFY(parser),
6403  .location = {
6404  .start = opening->start,
6405  .end = closing->end
6406  }
6407  },
6408  .body = body,
6409  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6410  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6411  };
6412 
6413  return node;
6414 }
6415 
6419 static pm_pinned_expression_node_t *
6420 pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6421  pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6422 
6423  *node = (pm_pinned_expression_node_t) {
6424  {
6425  .type = PM_PINNED_EXPRESSION_NODE,
6426  .node_id = PM_NODE_IDENTIFY(parser),
6427  .location = {
6428  .start = operator->start,
6429  .end = rparen->end
6430  }
6431  },
6432  .expression = expression,
6433  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6434  .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6435  .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6436  };
6437 
6438  return node;
6439 }
6440 
6444 static pm_pinned_variable_node_t *
6445 pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6446  pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6447 
6448  *node = (pm_pinned_variable_node_t) {
6449  {
6450  .type = PM_PINNED_VARIABLE_NODE,
6451  .node_id = PM_NODE_IDENTIFY(parser),
6452  .location = {
6453  .start = operator->start,
6454  .end = variable->location.end
6455  }
6456  },
6457  .variable = variable,
6458  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6459  };
6460 
6461  return node;
6462 }
6463 
6467 static pm_post_execution_node_t *
6468 pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6469  pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6470 
6471  *node = (pm_post_execution_node_t) {
6472  {
6473  .type = PM_POST_EXECUTION_NODE,
6474  .node_id = PM_NODE_IDENTIFY(parser),
6475  .location = {
6476  .start = keyword->start,
6477  .end = closing->end
6478  }
6479  },
6480  .statements = statements,
6481  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6482  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6483  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6484  };
6485 
6486  return node;
6487 }
6488 
6492 static pm_pre_execution_node_t *
6493 pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6494  pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6495 
6496  *node = (pm_pre_execution_node_t) {
6497  {
6498  .type = PM_PRE_EXECUTION_NODE,
6499  .node_id = PM_NODE_IDENTIFY(parser),
6500  .location = {
6501  .start = keyword->start,
6502  .end = closing->end
6503  }
6504  },
6505  .statements = statements,
6506  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6507  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6508  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6509  };
6510 
6511  return node;
6512 }
6513 
6517 static pm_range_node_t *
6518 pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6519  pm_assert_value_expression(parser, left);
6520  pm_assert_value_expression(parser, right);
6521 
6522  pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6523  pm_node_flags_t flags = 0;
6524 
6525  // Indicate that this node is an exclusive range if the operator is `...`.
6526  if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6527  flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6528  }
6529 
6530  // Indicate that this node is a static literal (i.e., can be compiled with
6531  // a putobject in CRuby) if the left and right are implicit nil, explicit
6532  // nil, or integers.
6533  if (
6534  (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6535  (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6536  ) {
6537  flags |= PM_NODE_FLAG_STATIC_LITERAL;
6538  }
6539 
6540  *node = (pm_range_node_t) {
6541  {
6542  .type = PM_RANGE_NODE,
6543  .flags = flags,
6544  .node_id = PM_NODE_IDENTIFY(parser),
6545  .location = {
6546  .start = (left == NULL ? operator->start : left->location.start),
6547  .end = (right == NULL ? operator->end : right->location.end)
6548  }
6549  },
6550  .left = left,
6551  .right = right,
6552  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6553  };
6554 
6555  return node;
6556 }
6557 
6561 static pm_redo_node_t *
6562 pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6563  assert(token->type == PM_TOKEN_KEYWORD_REDO);
6564  pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6565 
6566  *node = (pm_redo_node_t) {{
6567  .type = PM_REDO_NODE,
6568  .node_id = PM_NODE_IDENTIFY(parser),
6569  .location = PM_LOCATION_TOKEN_VALUE(token)
6570  }};
6571 
6572  return node;
6573 }
6574 
6579 static pm_regular_expression_node_t *
6580 pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6581  pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6582 
6583  *node = (pm_regular_expression_node_t) {
6584  {
6585  .type = PM_REGULAR_EXPRESSION_NODE,
6586  .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6587  .node_id = PM_NODE_IDENTIFY(parser),
6588  .location = {
6589  .start = MIN(opening->start, closing->start),
6590  .end = MAX(opening->end, closing->end)
6591  }
6592  },
6593  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6594  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6595  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6596  .unescaped = *unescaped
6597  };
6598 
6599  return node;
6600 }
6601 
6605 static inline pm_regular_expression_node_t *
6606 pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607  return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6608 }
6609 
6613 static pm_required_parameter_node_t *
6614 pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6615  pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6616 
6617  *node = (pm_required_parameter_node_t) {
6618  {
6619  .type = PM_REQUIRED_PARAMETER_NODE,
6620  .node_id = PM_NODE_IDENTIFY(parser),
6621  .location = PM_LOCATION_TOKEN_VALUE(token)
6622  },
6623  .name = pm_parser_constant_id_token(parser, token)
6624  };
6625 
6626  return node;
6627 }
6628 
6632 static pm_rescue_modifier_node_t *
6633 pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6634  pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6635 
6636  *node = (pm_rescue_modifier_node_t) {
6637  {
6638  .type = PM_RESCUE_MODIFIER_NODE,
6639  .node_id = PM_NODE_IDENTIFY(parser),
6640  .location = {
6641  .start = expression->location.start,
6642  .end = rescue_expression->location.end
6643  }
6644  },
6645  .expression = expression,
6646  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6647  .rescue_expression = rescue_expression
6648  };
6649 
6650  return node;
6651 }
6652 
6656 static pm_rescue_node_t *
6657 pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6658  pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6659 
6660  *node = (pm_rescue_node_t) {
6661  {
6662  .type = PM_RESCUE_NODE,
6663  .node_id = PM_NODE_IDENTIFY(parser),
6664  .location = PM_LOCATION_TOKEN_VALUE(keyword)
6665  },
6666  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667  .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6668  .reference = NULL,
6669  .statements = NULL,
6670  .subsequent = NULL,
6671  .exceptions = { 0 }
6672  };
6673 
6674  return node;
6675 }
6676 
6677 static inline void
6678 pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6679  node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6680 }
6681 
6685 static void
6686 pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6687  node->reference = reference;
6688  node->base.location.end = reference->location.end;
6689 }
6690 
6694 static void
6695 pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6696  node->statements = statements;
6697  if (pm_statements_node_body_length(statements) > 0) {
6698  node->base.location.end = statements->base.location.end;
6699  }
6700 }
6701 
6705 static void
6706 pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6707  node->subsequent = subsequent;
6708  node->base.location.end = subsequent->base.location.end;
6709 }
6710 
6714 static void
6715 pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6716  pm_node_list_append(&node->exceptions, exception);
6717  node->base.location.end = exception->location.end;
6718 }
6719 
6723 static pm_rest_parameter_node_t *
6724 pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6725  pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6726 
6727  *node = (pm_rest_parameter_node_t) {
6728  {
6729  .type = PM_REST_PARAMETER_NODE,
6730  .node_id = PM_NODE_IDENTIFY(parser),
6731  .location = {
6732  .start = operator->start,
6733  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6734  }
6735  },
6736  .name = pm_parser_optional_constant_id_token(parser, name),
6737  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6738  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6739  };
6740 
6741  return node;
6742 }
6743 
6747 static pm_retry_node_t *
6748 pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6749  assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6750  pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6751 
6752  *node = (pm_retry_node_t) {{
6753  .type = PM_RETRY_NODE,
6754  .node_id = PM_NODE_IDENTIFY(parser),
6755  .location = PM_LOCATION_TOKEN_VALUE(token)
6756  }};
6757 
6758  return node;
6759 }
6760 
6764 static pm_return_node_t *
6765 pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6766  pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6767 
6768  *node = (pm_return_node_t) {
6769  {
6770  .type = PM_RETURN_NODE,
6771  .node_id = PM_NODE_IDENTIFY(parser),
6772  .location = {
6773  .start = keyword->start,
6774  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6775  }
6776  },
6777  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6778  .arguments = arguments
6779  };
6780 
6781  return node;
6782 }
6783 
6787 static pm_self_node_t *
6788 pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6789  assert(token->type == PM_TOKEN_KEYWORD_SELF);
6790  pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6791 
6792  *node = (pm_self_node_t) {{
6793  .type = PM_SELF_NODE,
6794  .node_id = PM_NODE_IDENTIFY(parser),
6795  .location = PM_LOCATION_TOKEN_VALUE(token)
6796  }};
6797 
6798  return node;
6799 }
6800 
6804 static pm_shareable_constant_node_t *
6805 pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6806  pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6807 
6808  *node = (pm_shareable_constant_node_t) {
6809  {
6810  .type = PM_SHAREABLE_CONSTANT_NODE,
6811  .flags = (pm_node_flags_t) value,
6812  .node_id = PM_NODE_IDENTIFY(parser),
6813  .location = PM_LOCATION_NODE_VALUE(write)
6814  },
6815  .write = write
6816  };
6817 
6818  return node;
6819 }
6820 
6824 static pm_singleton_class_node_t *
6825 pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6826  pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6827 
6828  *node = (pm_singleton_class_node_t) {
6829  {
6830  .type = PM_SINGLETON_CLASS_NODE,
6831  .node_id = PM_NODE_IDENTIFY(parser),
6832  .location = {
6833  .start = class_keyword->start,
6834  .end = end_keyword->end
6835  }
6836  },
6837  .locals = *locals,
6838  .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6839  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6840  .expression = expression,
6841  .body = body,
6842  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6843  };
6844 
6845  return node;
6846 }
6847 
6851 static pm_source_encoding_node_t *
6852 pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6853  assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6854  pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6855 
6856  *node = (pm_source_encoding_node_t) {{
6857  .type = PM_SOURCE_ENCODING_NODE,
6858  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6859  .node_id = PM_NODE_IDENTIFY(parser),
6860  .location = PM_LOCATION_TOKEN_VALUE(token)
6861  }};
6862 
6863  return node;
6864 }
6865 
6869 static pm_source_file_node_t*
6870 pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6871  pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6872  assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6873 
6874  pm_node_flags_t flags = 0;
6875 
6876  switch (parser->frozen_string_literal) {
6877  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6878  flags |= PM_STRING_FLAGS_MUTABLE;
6879  break;
6880  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6881  flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6882  break;
6883  }
6884 
6885  *node = (pm_source_file_node_t) {
6886  {
6887  .type = PM_SOURCE_FILE_NODE,
6888  .flags = flags,
6889  .node_id = PM_NODE_IDENTIFY(parser),
6890  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6891  },
6892  .filepath = parser->filepath
6893  };
6894 
6895  return node;
6896 }
6897 
6901 static pm_source_line_node_t *
6902 pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6903  assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6904  pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6905 
6906  *node = (pm_source_line_node_t) {{
6907  .type = PM_SOURCE_LINE_NODE,
6908  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6909  .node_id = PM_NODE_IDENTIFY(parser),
6910  .location = PM_LOCATION_TOKEN_VALUE(token)
6911  }};
6912 
6913  return node;
6914 }
6915 
6919 static pm_splat_node_t *
6920 pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6921  pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6922 
6923  *node = (pm_splat_node_t) {
6924  {
6925  .type = PM_SPLAT_NODE,
6926  .node_id = PM_NODE_IDENTIFY(parser),
6927  .location = {
6928  .start = operator->start,
6929  .end = (expression == NULL ? operator->end : expression->location.end)
6930  }
6931  },
6932  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6933  .expression = expression
6934  };
6935 
6936  return node;
6937 }
6938 
6942 static pm_statements_node_t *
6943 pm_statements_node_create(pm_parser_t *parser) {
6944  pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6945 
6946  *node = (pm_statements_node_t) {
6947  {
6948  .type = PM_STATEMENTS_NODE,
6949  .node_id = PM_NODE_IDENTIFY(parser),
6950  .location = PM_LOCATION_NULL_VALUE(parser)
6951  },
6952  .body = { 0 }
6953  };
6954 
6955  return node;
6956 }
6957 
6961 static size_t
6962 pm_statements_node_body_length(pm_statements_node_t *node) {
6963  return node && node->body.size;
6964 }
6965 
6969 static void
6970 pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6971  node->base.location = (pm_location_t) { .start = start, .end = end };
6972 }
6973 
6978 static inline void
6979 pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6980  if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6981  node->base.location.start = statement->location.start;
6982  }
6983 
6984  if (statement->location.end > node->base.location.end) {
6985  node->base.location.end = statement->location.end;
6986  }
6987 }
6988 
6992 static void
6993 pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6994  pm_statements_node_body_update(node, statement);
6995 
6996  if (node->body.size > 0) {
6997  const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6998 
6999  switch (PM_NODE_TYPE(previous)) {
7000  case PM_BREAK_NODE:
7001  case PM_NEXT_NODE:
7002  case PM_REDO_NODE:
7003  case PM_RETRY_NODE:
7004  case PM_RETURN_NODE:
7005  pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7006  break;
7007  default:
7008  break;
7009  }
7010  }
7011 
7012  pm_node_list_append(&node->body, statement);
7013  if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7014 }
7015 
7019 static void
7020 pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7021  pm_statements_node_body_update(node, statement);
7022  pm_node_list_prepend(&node->body, statement);
7023  pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7024 }
7025 
7029 static inline pm_string_node_t *
7030 pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7031  pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7032  pm_node_flags_t flags = 0;
7033 
7034  switch (parser->frozen_string_literal) {
7035  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7036  flags = PM_STRING_FLAGS_MUTABLE;
7037  break;
7038  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7039  flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7040  break;
7041  }
7042 
7043  *node = (pm_string_node_t) {
7044  {
7045  .type = PM_STRING_NODE,
7046  .flags = flags,
7047  .node_id = PM_NODE_IDENTIFY(parser),
7048  .location = {
7049  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7050  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7051  }
7052  },
7053  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7054  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7055  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7056  .unescaped = *string
7057  };
7058 
7059  return node;
7060 }
7061 
7065 static pm_string_node_t *
7066 pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7067  return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7068 }
7069 
7074 static pm_string_node_t *
7075 pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7076  pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7077  parser->current_string = PM_STRING_EMPTY;
7078  return node;
7079 }
7080 
7084 static pm_super_node_t *
7085 pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7086  assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7087  pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7088 
7089  const uint8_t *end = pm_arguments_end(arguments);
7090  if (end == NULL) {
7091  assert(false && "unreachable");
7092  }
7093 
7094  *node = (pm_super_node_t) {
7095  {
7096  .type = PM_SUPER_NODE,
7097  .node_id = PM_NODE_IDENTIFY(parser),
7098  .location = {
7099  .start = keyword->start,
7100  .end = end,
7101  }
7102  },
7103  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7104  .lparen_loc = arguments->opening_loc,
7105  .arguments = arguments->arguments,
7106  .rparen_loc = arguments->closing_loc,
7107  .block = arguments->block
7108  };
7109 
7110  return node;
7111 }
7112 
7117 static bool
7118 pm_ascii_only_p(const pm_string_t *contents) {
7119  const size_t length = pm_string_length(contents);
7120  const uint8_t *source = pm_string_source(contents);
7121 
7122  for (size_t index = 0; index < length; index++) {
7123  if (source[index] & 0x80) return false;
7124  }
7125 
7126  return true;
7127 }
7128 
7132 static void
7133 parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7134  for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7135  size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7136 
7137  if (width == 0) {
7138  pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7139  break;
7140  }
7141 
7142  cursor += width;
7143  }
7144 }
7145 
7150 static void
7151 parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7152  const pm_encoding_t *encoding = parser->encoding;
7153 
7154  for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7155  size_t width = encoding->char_width(cursor, end - cursor);
7156 
7157  if (width == 0) {
7158  pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7159  break;
7160  }
7161 
7162  cursor += width;
7163  }
7164 }
7165 
7175 static inline pm_node_flags_t
7176 parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7177  if (parser->explicit_encoding != NULL) {
7178  // A Symbol may optionally have its encoding explicitly set. This will
7179  // happen if an escape sequence results in a non-ASCII code point.
7180  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7181  if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7182  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7183  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7184  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7185  } else if (validate) {
7186  parse_symbol_encoding_validate_other(parser, location, contents);
7187  }
7188  } else if (pm_ascii_only_p(contents)) {
7189  // Ruby stipulates that all source files must use an ASCII-compatible
7190  // encoding. Thus, all symbols appearing in source are eligible for
7191  // "downgrading" to US-ASCII.
7192  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7193  } else if (validate) {
7194  parse_symbol_encoding_validate_other(parser, location, contents);
7195  }
7196 
7197  return 0;
7198 }
7199 
7200 static pm_node_flags_t
7201 parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7202  assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7203  (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7204  (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7205  (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7206 
7207  // There's special validation logic used if a string does not contain any character escape sequences.
7208  if (parser->explicit_encoding == NULL) {
7209  // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7210  // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7211  // the US-ASCII encoding.
7212  if (ascii_only) {
7213  return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7214  }
7215 
7216  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7217  if (!ascii_only) {
7218  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7219  }
7220  } else if (parser->encoding != modifier_encoding) {
7221  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7222 
7223  if (modifier == 'n' && !ascii_only) {
7224  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7225  }
7226  }
7227 
7228  return flags;
7229  }
7230 
7231  // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7232  bool mixed_encoding = false;
7233 
7234  if (mixed_encoding) {
7235  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7236  } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7237  // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7238  bool valid_string_in_modifier_encoding = true;
7239 
7240  if (!valid_string_in_modifier_encoding) {
7241  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7242  }
7243  } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7244  // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7245  if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7246  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7247  }
7248  }
7249 
7250  // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7251  return flags;
7252 }
7253 
7260 static pm_node_flags_t
7261 parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7262  // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7263  bool valid_unicode_range = true;
7264  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7265  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7266  return flags;
7267  }
7268 
7269  // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7270  // to multi-byte characters are allowed.
7271  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7272  // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7273  // following error message appearing twice. We do the same for compatibility.
7274  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7275  }
7276 
7285  if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7286  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7287  }
7288 
7289  if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7290  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7291  }
7292 
7293  if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7294  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7295  }
7296 
7297  if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7298  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7299  }
7300 
7301  // At this point no encoding modifiers will be present on the regular expression as they would have already
7302  // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7303  // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7304  if (ascii_only) {
7305  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7306  }
7307 
7308  // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7309  // or by specifying a modifier.
7310  //
7311  // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7312  if (parser->explicit_encoding != NULL) {
7313  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7314  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7315  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7316  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7317  }
7318  }
7319 
7320  return 0;
7321 }
7322 
7327 static pm_symbol_node_t *
7328 pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7329  pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7330 
7331  *node = (pm_symbol_node_t) {
7332  {
7333  .type = PM_SYMBOL_NODE,
7334  .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7335  .node_id = PM_NODE_IDENTIFY(parser),
7336  .location = {
7337  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7338  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7339  }
7340  },
7341  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7342  .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7343  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7344  .unescaped = *unescaped
7345  };
7346 
7347  return node;
7348 }
7349 
7353 static inline pm_symbol_node_t *
7354 pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7355  return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7356 }
7357 
7361 static pm_symbol_node_t *
7362 pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7363  pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7364  parser->current_string = PM_STRING_EMPTY;
7365  return node;
7366 }
7367 
7371 static pm_symbol_node_t *
7372 pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7373  pm_symbol_node_t *node;
7374 
7375  switch (token->type) {
7376  case PM_TOKEN_LABEL: {
7377  pm_token_t opening = not_provided(parser);
7378  pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7379 
7380  pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7381  node = pm_symbol_node_create(parser, &opening, &label, &closing);
7382 
7383  assert((label.end - label.start) >= 0);
7384  pm_string_shared_init(&node->unescaped, label.start, label.end);
7385  pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7386 
7387  break;
7388  }
7389  case PM_TOKEN_MISSING: {
7390  pm_token_t opening = not_provided(parser);
7391  pm_token_t closing = not_provided(parser);
7392 
7393  pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7394  node = pm_symbol_node_create(parser, &opening, &label, &closing);
7395  break;
7396  }
7397  default:
7398  assert(false && "unreachable");
7399  node = NULL;
7400  break;
7401  }
7402 
7403  return node;
7404 }
7405 
7409 static pm_symbol_node_t *
7410 pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7411  pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7412 
7413  *node = (pm_symbol_node_t) {
7414  {
7415  .type = PM_SYMBOL_NODE,
7416  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7417  .node_id = PM_NODE_IDENTIFY(parser),
7418  .location = PM_LOCATION_NULL_VALUE(parser)
7419  },
7420  .value_loc = PM_LOCATION_NULL_VALUE(parser),
7421  .unescaped = { 0 }
7422  };
7423 
7424  pm_string_constant_init(&node->unescaped, content, strlen(content));
7425  return node;
7426 }
7427 
7431 static bool
7432 pm_symbol_node_label_p(pm_node_t *node) {
7433  const uint8_t *end = NULL;
7434 
7435  switch (PM_NODE_TYPE(node)) {
7436  case PM_SYMBOL_NODE:
7437  end = ((pm_symbol_node_t *) node)->closing_loc.end;
7438  break;
7439  case PM_INTERPOLATED_SYMBOL_NODE:
7440  end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7441  break;
7442  default:
7443  return false;
7444  }
7445 
7446  return (end != NULL) && (end[-1] == ':');
7447 }
7448 
7452 static pm_symbol_node_t *
7453 pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7454  pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7455 
7456  *new_node = (pm_symbol_node_t) {
7457  {
7458  .type = PM_SYMBOL_NODE,
7459  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7460  .node_id = PM_NODE_IDENTIFY(parser),
7461  .location = {
7462  .start = opening->start,
7463  .end = closing->end
7464  }
7465  },
7466  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7467  .value_loc = node->content_loc,
7468  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7469  .unescaped = node->unescaped
7470  };
7471 
7472  pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7473  pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7474 
7475  // We are explicitly _not_ using pm_node_destroy here because we don't want
7476  // to trash the unescaped string. We could instead copy the string if we
7477  // know that it is owned, but we're taking the fast path for now.
7478  xfree(node);
7479 
7480  return new_node;
7481 }
7482 
7486 static pm_string_node_t *
7487 pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7488  pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7489  pm_node_flags_t flags = 0;
7490 
7491  switch (parser->frozen_string_literal) {
7492  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7493  flags = PM_STRING_FLAGS_MUTABLE;
7494  break;
7495  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7496  flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7497  break;
7498  }
7499 
7500  *new_node = (pm_string_node_t) {
7501  {
7502  .type = PM_STRING_NODE,
7503  .flags = flags,
7504  .node_id = PM_NODE_IDENTIFY(parser),
7505  .location = node->base.location
7506  },
7507  .opening_loc = node->opening_loc,
7508  .content_loc = node->value_loc,
7509  .closing_loc = node->closing_loc,
7510  .unescaped = node->unescaped
7511  };
7512 
7513  // We are explicitly _not_ using pm_node_destroy here because we don't want
7514  // to trash the unescaped string. We could instead copy the string if we
7515  // know that it is owned, but we're taking the fast path for now.
7516  xfree(node);
7517 
7518  return new_node;
7519 }
7520 
7524 static pm_true_node_t *
7525 pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7526  assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7527  pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7528 
7529  *node = (pm_true_node_t) {{
7530  .type = PM_TRUE_NODE,
7531  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7532  .node_id = PM_NODE_IDENTIFY(parser),
7533  .location = PM_LOCATION_TOKEN_VALUE(token)
7534  }};
7535 
7536  return node;
7537 }
7538 
7542 static pm_true_node_t *
7543 pm_true_node_synthesized_create(pm_parser_t *parser) {
7544  pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7545 
7546  *node = (pm_true_node_t) {{
7547  .type = PM_TRUE_NODE,
7548  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7549  .node_id = PM_NODE_IDENTIFY(parser),
7550  .location = { .start = parser->start, .end = parser->end }
7551  }};
7552 
7553  return node;
7554 }
7555 
7559 static pm_undef_node_t *
7560 pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7561  assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7562  pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7563 
7564  *node = (pm_undef_node_t) {
7565  {
7566  .type = PM_UNDEF_NODE,
7567  .node_id = PM_NODE_IDENTIFY(parser),
7568  .location = PM_LOCATION_TOKEN_VALUE(token),
7569  },
7570  .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7571  .names = { 0 }
7572  };
7573 
7574  return node;
7575 }
7576 
7580 static void
7581 pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7582  node->base.location.end = name->location.end;
7583  pm_node_list_append(&node->names, name);
7584 }
7585 
7589 static pm_unless_node_t *
7590 pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7591  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7592  pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7593 
7594  const uint8_t *end;
7595  if (statements != NULL) {
7596  end = statements->base.location.end;
7597  } else {
7598  end = predicate->location.end;
7599  }
7600 
7601  *node = (pm_unless_node_t) {
7602  {
7603  .type = PM_UNLESS_NODE,
7604  .flags = PM_NODE_FLAG_NEWLINE,
7605  .node_id = PM_NODE_IDENTIFY(parser),
7606  .location = {
7607  .start = keyword->start,
7608  .end = end
7609  },
7610  },
7611  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7612  .predicate = predicate,
7613  .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7614  .statements = statements,
7615  .else_clause = NULL,
7616  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7617  };
7618 
7619  return node;
7620 }
7621 
7625 static pm_unless_node_t *
7626 pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7627  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7628  pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7629 
7630  pm_statements_node_t *statements = pm_statements_node_create(parser);
7631  pm_statements_node_body_append(parser, statements, statement, true);
7632 
7633  *node = (pm_unless_node_t) {
7634  {
7635  .type = PM_UNLESS_NODE,
7636  .flags = PM_NODE_FLAG_NEWLINE,
7637  .node_id = PM_NODE_IDENTIFY(parser),
7638  .location = {
7639  .start = statement->location.start,
7640  .end = predicate->location.end
7641  },
7642  },
7643  .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7644  .predicate = predicate,
7645  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7646  .statements = statements,
7647  .else_clause = NULL,
7648  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7649  };
7650 
7651  return node;
7652 }
7653 
7654 static inline void
7655 pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7656  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7657  node->base.location.end = end_keyword->end;
7658 }
7659 
7665 static void
7666 pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7667  assert(parser->current_block_exits != NULL);
7668 
7669  // All of the block exits that we want to remove should be within the
7670  // statements, and since we are modifying the statements, we shouldn't have
7671  // to check the end location.
7672  const uint8_t *start = statements->base.location.start;
7673 
7674  for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7675  pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7676  if (block_exit->location.start < start) break;
7677 
7678  // Implicitly remove from the list by lowering the size.
7679  parser->current_block_exits->size--;
7680  }
7681 }
7682 
7686 static pm_until_node_t *
7687 pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690 
7691  *node = (pm_until_node_t) {
7692  {
7693  .type = PM_UNTIL_NODE,
7694  .flags = flags,
7695  .node_id = PM_NODE_IDENTIFY(parser),
7696  .location = {
7697  .start = keyword->start,
7698  .end = closing->end,
7699  },
7700  },
7701  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7703  .predicate = predicate,
7704  .statements = statements
7705  };
7706 
7707  return node;
7708 }
7709 
7713 static pm_until_node_t *
7714 pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7715  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7716  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7717  pm_loop_modifier_block_exits(parser, statements);
7718 
7719  *node = (pm_until_node_t) {
7720  {
7721  .type = PM_UNTIL_NODE,
7722  .flags = flags,
7723  .node_id = PM_NODE_IDENTIFY(parser),
7724  .location = {
7725  .start = statements->base.location.start,
7726  .end = predicate->location.end,
7727  },
7728  },
7729  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7730  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7731  .predicate = predicate,
7732  .statements = statements
7733  };
7734 
7735  return node;
7736 }
7737 
7741 static pm_when_node_t *
7742 pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7743  pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7744 
7745  *node = (pm_when_node_t) {
7746  {
7747  .type = PM_WHEN_NODE,
7748  .node_id = PM_NODE_IDENTIFY(parser),
7749  .location = {
7750  .start = keyword->start,
7751  .end = NULL
7752  }
7753  },
7754  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7755  .statements = NULL,
7756  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7757  .conditions = { 0 }
7758  };
7759 
7760  return node;
7761 }
7762 
7766 static void
7767 pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7768  node->base.location.end = condition->location.end;
7769  pm_node_list_append(&node->conditions, condition);
7770 }
7771 
7775 static inline void
7776 pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7777  node->base.location.end = then_keyword->end;
7778  node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7779 }
7780 
7784 static void
7785 pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7786  if (statements->base.location.end > node->base.location.end) {
7787  node->base.location.end = statements->base.location.end;
7788  }
7789 
7790  node->statements = statements;
7791 }
7792 
7796 static pm_while_node_t *
7797 pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7798  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7799  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7800 
7801  *node = (pm_while_node_t) {
7802  {
7803  .type = PM_WHILE_NODE,
7804  .flags = flags,
7805  .node_id = PM_NODE_IDENTIFY(parser),
7806  .location = {
7807  .start = keyword->start,
7808  .end = closing->end
7809  },
7810  },
7811  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7812  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7813  .predicate = predicate,
7814  .statements = statements
7815  };
7816 
7817  return node;
7818 }
7819 
7823 static pm_while_node_t *
7824 pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7825  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7826  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7827  pm_loop_modifier_block_exits(parser, statements);
7828 
7829  *node = (pm_while_node_t) {
7830  {
7831  .type = PM_WHILE_NODE,
7832  .flags = flags,
7833  .node_id = PM_NODE_IDENTIFY(parser),
7834  .location = {
7835  .start = statements->base.location.start,
7836  .end = predicate->location.end
7837  },
7838  },
7839  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7840  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7841  .predicate = predicate,
7842  .statements = statements
7843  };
7844 
7845  return node;
7846 }
7847 
7851 static pm_while_node_t *
7852 pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7853  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7854 
7855  *node = (pm_while_node_t) {
7856  {
7857  .type = PM_WHILE_NODE,
7858  .node_id = PM_NODE_IDENTIFY(parser),
7859  .location = PM_LOCATION_NULL_VALUE(parser)
7860  },
7861  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7862  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7863  .predicate = predicate,
7864  .statements = statements
7865  };
7866 
7867  return node;
7868 }
7869 
7874 static pm_x_string_node_t *
7875 pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7876  pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7877 
7878  *node = (pm_x_string_node_t) {
7879  {
7880  .type = PM_X_STRING_NODE,
7881  .flags = PM_STRING_FLAGS_FROZEN,
7882  .node_id = PM_NODE_IDENTIFY(parser),
7883  .location = {
7884  .start = opening->start,
7885  .end = closing->end
7886  },
7887  },
7888  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7889  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7890  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7891  .unescaped = *unescaped
7892  };
7893 
7894  return node;
7895 }
7896 
7900 static inline pm_x_string_node_t *
7901 pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7902  return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7903 }
7904 
7908 static pm_yield_node_t *
7909 pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7910  pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7911 
7912  const uint8_t *end;
7913  if (rparen_loc->start != NULL) {
7914  end = rparen_loc->end;
7915  } else if (arguments != NULL) {
7916  end = arguments->base.location.end;
7917  } else if (lparen_loc->start != NULL) {
7918  end = lparen_loc->end;
7919  } else {
7920  end = keyword->end;
7921  }
7922 
7923  *node = (pm_yield_node_t) {
7924  {
7925  .type = PM_YIELD_NODE,
7926  .node_id = PM_NODE_IDENTIFY(parser),
7927  .location = {
7928  .start = keyword->start,
7929  .end = end
7930  },
7931  },
7932  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7933  .lparen_loc = *lparen_loc,
7934  .arguments = arguments,
7935  .rparen_loc = *rparen_loc
7936  };
7937 
7938  return node;
7939 }
7940 
7941 #undef PM_NODE_ALLOC
7942 #undef PM_NODE_IDENTIFY
7943 
7948 static int
7949 pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7950  pm_scope_t *scope = parser->current_scope;
7951  int depth = 0;
7952 
7953  while (scope != NULL) {
7954  if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7955  if (scope->closed) break;
7956 
7957  scope = scope->previous;
7958  depth++;
7959  }
7960 
7961  return -1;
7962 }
7963 
7969 static inline int
7970 pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7971  return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7972 }
7973 
7977 static inline void
7978 pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7979  pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7980 }
7981 
7985 static pm_constant_id_t
7986 pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7987  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7988  if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7989  return constant_id;
7990 }
7991 
7995 static inline pm_constant_id_t
7996 pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7997  return pm_parser_local_add_location(parser, token->start, token->end, reads);
7998 }
7999 
8003 static pm_constant_id_t
8004 pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8005  pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8006  if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8007  return constant_id;
8008 }
8009 
8013 static pm_constant_id_t
8014 pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8015  pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8016  if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8017  return constant_id;
8018 }
8019 
8027 static bool
8028 pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8029  // We want to check whether the parameter name is a numbered parameter or
8030  // not.
8031  pm_refute_numbered_parameter(parser, name->start, name->end);
8032 
8033  // Otherwise we'll fetch the constant id for the parameter name and check
8034  // whether it's already in the current scope.
8035  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8036 
8037  if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8038  // Add an error if the parameter doesn't start with _ and has been seen before
8039  if ((name->start < name->end) && (*name->start != '_')) {
8040  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8041  }
8042  return true;
8043  }
8044  return false;
8045 }
8046 
8050 static void
8051 pm_parser_scope_pop(pm_parser_t *parser) {
8052  pm_scope_t *scope = parser->current_scope;
8053  parser->current_scope = scope->previous;
8054  pm_locals_free(&scope->locals);
8055  pm_node_list_free(&scope->implicit_parameters);
8056  xfree(scope);
8057 }
8058 
8059 /******************************************************************************/
8060 /* Stack helpers */
8061 /******************************************************************************/
8062 
8066 static inline void
8067 pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8068  *stack = (*stack << 1) | (value & 1);
8069 }
8070 
8074 static inline void
8075 pm_state_stack_pop(pm_state_stack_t *stack) {
8076  *stack >>= 1;
8077 }
8078 
8082 static inline bool
8083 pm_state_stack_p(const pm_state_stack_t *stack) {
8084  return *stack & 1;
8085 }
8086 
8087 static inline void
8088 pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8089  // Use the negation of the value to prevent stack overflow.
8090  pm_state_stack_push(&parser->accepts_block_stack, !value);
8091 }
8092 
8093 static inline void
8094 pm_accepts_block_stack_pop(pm_parser_t *parser) {
8095  pm_state_stack_pop(&parser->accepts_block_stack);
8096 }
8097 
8098 static inline bool
8099 pm_accepts_block_stack_p(pm_parser_t *parser) {
8100  return !pm_state_stack_p(&parser->accepts_block_stack);
8101 }
8102 
8103 static inline void
8104 pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8105  pm_state_stack_push(&parser->do_loop_stack, value);
8106 }
8107 
8108 static inline void
8109 pm_do_loop_stack_pop(pm_parser_t *parser) {
8110  pm_state_stack_pop(&parser->do_loop_stack);
8111 }
8112 
8113 static inline bool
8114 pm_do_loop_stack_p(pm_parser_t *parser) {
8115  return pm_state_stack_p(&parser->do_loop_stack);
8116 }
8117 
8118 /******************************************************************************/
8119 /* Lexer check helpers */
8120 /******************************************************************************/
8121 
8126 static inline uint8_t
8127 peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8128  if (cursor < parser->end) {
8129  return *cursor;
8130  } else {
8131  return '\0';
8132  }
8133 }
8134 
8140 static inline uint8_t
8141 peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8142  return peek_at(parser, parser->current.end + offset);
8143 }
8144 
8149 static inline uint8_t
8150 peek(const pm_parser_t *parser) {
8151  return peek_at(parser, parser->current.end);
8152 }
8153 
8158 static inline bool
8159 match(pm_parser_t *parser, uint8_t value) {
8160  if (peek(parser) == value) {
8161  parser->current.end++;
8162  return true;
8163  }
8164  return false;
8165 }
8166 
8171 static inline size_t
8172 match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8173  if (peek_at(parser, cursor) == '\n') {
8174  return 1;
8175  }
8176  if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8177  return 2;
8178  }
8179  return 0;
8180 }
8181 
8187 static inline size_t
8188 match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8189  return match_eol_at(parser, parser->current.end + offset);
8190 }
8191 
8197 static inline size_t
8198 match_eol(pm_parser_t *parser) {
8199  return match_eol_at(parser, parser->current.end);
8200 }
8201 
8205 static inline const uint8_t *
8206 next_newline(const uint8_t *cursor, ptrdiff_t length) {
8207  assert(length >= 0);
8208 
8209  // Note that it's okay for us to use memchr here to look for \n because none
8210  // of the encodings that we support have \n as a component of a multi-byte
8211  // character.
8212  return memchr(cursor, '\n', (size_t) length);
8213 }
8214 
8218 static inline bool
8219 ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8220  return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8221 }
8222 
8227 static bool
8228 parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8229  const pm_encoding_t *encoding = pm_encoding_find(start, end);
8230 
8231  if (encoding != NULL) {
8232  if (parser->encoding != encoding) {
8233  parser->encoding = encoding;
8234  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8235  }
8236 
8237  parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8238  return true;
8239  }
8240 
8241  return false;
8242 }
8243 
8248 static void
8249 parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8250  const uint8_t *cursor = parser->current.start + 1;
8251  const uint8_t *end = parser->current.end;
8252 
8253  bool separator = false;
8254  while (true) {
8255  if (end - cursor <= 6) return;
8256  switch (cursor[6]) {
8257  case 'C': case 'c': cursor += 6; continue;
8258  case 'O': case 'o': cursor += 5; continue;
8259  case 'D': case 'd': cursor += 4; continue;
8260  case 'I': case 'i': cursor += 3; continue;
8261  case 'N': case 'n': cursor += 2; continue;
8262  case 'G': case 'g': cursor += 1; continue;
8263  case '=': case ':':
8264  separator = true;
8265  cursor += 6;
8266  break;
8267  default:
8268  cursor += 6;
8269  if (pm_char_is_whitespace(*cursor)) break;
8270  continue;
8271  }
8272  if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8273  separator = false;
8274  }
8275 
8276  while (true) {
8277  do {
8278  if (++cursor >= end) return;
8279  } while (pm_char_is_whitespace(*cursor));
8280 
8281  if (separator) break;
8282  if (*cursor != '=' && *cursor != ':') return;
8283 
8284  separator = true;
8285  cursor++;
8286  }
8287 
8288  const uint8_t *value_start = cursor;
8289  while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8290 
8291  if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8292  // If we were unable to parse the encoding value, then we've got an
8293  // issue because we didn't understand the encoding that the user was
8294  // trying to use. In this case we'll keep using the default encoding but
8295  // add an error to the parser to indicate an unsuccessful parse.
8296  pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8297  }
8298 }
8299 
8300 typedef enum {
8301  PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8302  PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8303  PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8304 } pm_magic_comment_boolean_value_t;
8305 
8310 static pm_magic_comment_boolean_value_t
8311 parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8312  if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8313  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8314  } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8315  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8316  } else {
8317  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8318  }
8319 }
8320 
8321 static inline bool
8322 pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8323  return b == '\'' || b == '"' || b == ':' || b == ';';
8324 }
8325 
8331 static inline const uint8_t *
8332 parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8333  while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8334  if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8335  return cursor;
8336  }
8337  cursor++;
8338  }
8339  return NULL;
8340 }
8341 
8352 static inline bool
8353 parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8354  bool result = true;
8355 
8356  const uint8_t *start = parser->current.start + 1;
8357  const uint8_t *end = parser->current.end;
8358  if (end - start <= 7) return false;
8359 
8360  const uint8_t *cursor;
8361  bool indicator = false;
8362 
8363  if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8364  start = cursor + 3;
8365 
8366  if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8367  end = cursor;
8368  indicator = true;
8369  } else {
8370  // If we have a start marker but not an end marker, then we cannot
8371  // have a magic comment.
8372  return false;
8373  }
8374  }
8375 
8376  cursor = start;
8377  while (cursor < end) {
8378  while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8379 
8380  const uint8_t *key_start = cursor;
8381  while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8382 
8383  const uint8_t *key_end = cursor;
8384  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8385  if (cursor == end) break;
8386 
8387  if (*cursor == ':') {
8388  cursor++;
8389  } else {
8390  if (!indicator) return false;
8391  continue;
8392  }
8393 
8394  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8395  if (cursor == end) break;
8396 
8397  const uint8_t *value_start;
8398  const uint8_t *value_end;
8399 
8400  if (*cursor == '"') {
8401  value_start = ++cursor;
8402  for (; cursor < end && *cursor != '"'; cursor++) {
8403  if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8404  }
8405  value_end = cursor;
8406  if (*cursor == '"') cursor++;
8407  } else {
8408  value_start = cursor;
8409  while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8410  value_end = cursor;
8411  }
8412 
8413  if (indicator) {
8414  while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8415  } else {
8416  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8417  if (cursor != end) return false;
8418  }
8419 
8420  // Here, we need to do some processing on the key to swap out dashes for
8421  // underscores. We only need to do this if there _is_ a dash in the key.
8422  pm_string_t key;
8423  const size_t key_length = (size_t) (key_end - key_start);
8424  const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8425 
8426  if (dash == NULL) {
8427  pm_string_shared_init(&key, key_start, key_end);
8428  } else {
8429  uint8_t *buffer = xmalloc(key_length);
8430  if (buffer == NULL) break;
8431 
8432  memcpy(buffer, key_start, key_length);
8433  buffer[dash - key_start] = '_';
8434 
8435  while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8436  buffer[dash - key_start] = '_';
8437  }
8438 
8439  pm_string_owned_init(&key, buffer, key_length);
8440  }
8441 
8442  // Finally, we can start checking the key against the list of known
8443  // magic comment keys, and potentially change state based on that.
8444  const uint8_t *key_source = pm_string_source(&key);
8445  uint32_t value_length = (uint32_t) (value_end - value_start);
8446 
8447  // We only want to attempt to compare against encoding comments if it's
8448  // the first line in the file (or the second in the case of a shebang).
8449  if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8450  if (
8451  (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8452  (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8453  ) {
8454  result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8455  }
8456  }
8457 
8458  if (key_length == 11) {
8459  if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8460  switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8461  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8462  PM_PARSER_WARN_TOKEN_FORMAT(
8463  parser,
8464  parser->current,
8465  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8466  (int) key_length,
8467  (const char *) key_source,
8468  (int) value_length,
8469  (const char *) value_start
8470  );
8471  break;
8472  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8473  parser->warn_mismatched_indentation = false;
8474  break;
8475  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8476  parser->warn_mismatched_indentation = true;
8477  break;
8478  }
8479  }
8480  } else if (key_length == 21) {
8481  if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8482  // We only want to handle frozen string literal comments if it's
8483  // before any semantic tokens have been seen.
8484  if (semantic_token_seen) {
8485  pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8486  } else {
8487  switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8488  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8489  PM_PARSER_WARN_TOKEN_FORMAT(
8490  parser,
8491  parser->current,
8492  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8493  (int) key_length,
8494  (const char *) key_source,
8495  (int) value_length,
8496  (const char *) value_start
8497  );
8498  break;
8499  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8501  break;
8502  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8504  break;
8505  }
8506  }
8507  }
8508  } else if (key_length == 24) {
8509  if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8510  const uint8_t *cursor = parser->current.start;
8511  while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8512 
8513  if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8514  pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8515  } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8516  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8517  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8518  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8519  } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8520  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8521  } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8522  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8523  } else {
8524  PM_PARSER_WARN_TOKEN_FORMAT(
8525  parser,
8526  parser->current,
8527  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8528  (int) key_length,
8529  (const char *) key_source,
8530  (int) value_length,
8531  (const char *) value_start
8532  );
8533  }
8534  }
8535  }
8536 
8537  // When we're done, we want to free the string in case we had to
8538  // allocate memory for it.
8539  pm_string_free(&key);
8540 
8541  // Allocate a new magic comment node to append to the parser's list.
8543  if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8544  magic_comment->key_start = key_start;
8545  magic_comment->value_start = value_start;
8546  magic_comment->key_length = (uint32_t) key_length;
8547  magic_comment->value_length = value_length;
8549  }
8550  }
8551 
8552  return result;
8553 }
8554 
8555 /******************************************************************************/
8556 /* Context manipulations */
8557 /******************************************************************************/
8558 
8559 static bool
8560 context_terminator(pm_context_t context, pm_token_t *token) {
8561  switch (context) {
8562  case PM_CONTEXT_MAIN:
8563  case PM_CONTEXT_DEF_PARAMS:
8564  case PM_CONTEXT_DEFINED:
8566  case PM_CONTEXT_TERNARY:
8568  return token->type == PM_TOKEN_EOF;
8570  return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8571  case PM_CONTEXT_PREEXE:
8572  case PM_CONTEXT_POSTEXE:
8573  return token->type == PM_TOKEN_BRACE_RIGHT;
8574  case PM_CONTEXT_MODULE:
8575  case PM_CONTEXT_CLASS:
8576  case PM_CONTEXT_SCLASS:
8578  case PM_CONTEXT_DEF:
8580  return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8581  case PM_CONTEXT_WHILE:
8582  case PM_CONTEXT_UNTIL:
8583  case PM_CONTEXT_ELSE:
8584  case PM_CONTEXT_FOR:
8588  case PM_CONTEXT_DEF_ENSURE:
8592  return token->type == PM_TOKEN_KEYWORD_END;
8594  return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8595  case PM_CONTEXT_FOR_INDEX:
8596  return token->type == PM_TOKEN_KEYWORD_IN;
8597  case PM_CONTEXT_CASE_WHEN:
8598  return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8599  case PM_CONTEXT_CASE_IN:
8600  return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8601  case PM_CONTEXT_IF:
8602  case PM_CONTEXT_ELSIF:
8603  return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8604  case PM_CONTEXT_UNLESS:
8605  return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8606  case PM_CONTEXT_EMBEXPR:
8607  return token->type == PM_TOKEN_EMBEXPR_END;
8609  return token->type == PM_TOKEN_BRACE_RIGHT;
8610  case PM_CONTEXT_PARENS:
8611  return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8612  case PM_CONTEXT_BEGIN:
8616  case PM_CONTEXT_DEF_RESCUE:
8620  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8621  case PM_CONTEXT_BEGIN_ELSE:
8622  case PM_CONTEXT_BLOCK_ELSE:
8623  case PM_CONTEXT_CLASS_ELSE:
8624  case PM_CONTEXT_DEF_ELSE:
8628  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8630  return token->type == PM_TOKEN_BRACE_RIGHT;
8631  case PM_CONTEXT_PREDICATE:
8632  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8633  case PM_CONTEXT_NONE:
8634  return false;
8635  }
8636 
8637  return false;
8638 }
8639 
8644 static pm_context_t
8645 context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8646  pm_context_node_t *context_node = parser->current_context;
8647 
8648  while (context_node != NULL) {
8649  if (context_terminator(context_node->context, token)) return context_node->context;
8650  context_node = context_node->prev;
8651  }
8652 
8653  return PM_CONTEXT_NONE;
8654 }
8655 
8656 static bool
8657 context_push(pm_parser_t *parser, pm_context_t context) {
8658  pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8659  if (context_node == NULL) return false;
8660 
8661  *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8662 
8663  if (parser->current_context == NULL) {
8664  parser->current_context = context_node;
8665  } else {
8666  context_node->prev = parser->current_context;
8667  parser->current_context = context_node;
8668  }
8669 
8670  return true;
8671 }
8672 
8673 static void
8674 context_pop(pm_parser_t *parser) {
8675  pm_context_node_t *prev = parser->current_context->prev;
8676  xfree(parser->current_context);
8677  parser->current_context = prev;
8678 }
8679 
8680 static bool
8681 context_p(const pm_parser_t *parser, pm_context_t context) {
8682  pm_context_node_t *context_node = parser->current_context;
8683 
8684  while (context_node != NULL) {
8685  if (context_node->context == context) return true;
8686  context_node = context_node->prev;
8687  }
8688 
8689  return false;
8690 }
8691 
8692 static bool
8693 context_def_p(const pm_parser_t *parser) {
8694  pm_context_node_t *context_node = parser->current_context;
8695 
8696  while (context_node != NULL) {
8697  switch (context_node->context) {
8698  case PM_CONTEXT_DEF:
8699  case PM_CONTEXT_DEF_PARAMS:
8700  case PM_CONTEXT_DEF_ENSURE:
8701  case PM_CONTEXT_DEF_RESCUE:
8702  case PM_CONTEXT_DEF_ELSE:
8703  return true;
8704  case PM_CONTEXT_CLASS:
8707  case PM_CONTEXT_CLASS_ELSE:
8708  case PM_CONTEXT_MODULE:
8712  case PM_CONTEXT_SCLASS:
8716  return false;
8717  default:
8718  context_node = context_node->prev;
8719  }
8720  }
8721 
8722  return false;
8723 }
8724 
8729 static const char *
8730 context_human(pm_context_t context) {
8731  switch (context) {
8732  case PM_CONTEXT_NONE:
8733  assert(false && "unreachable");
8734  return "";
8735  case PM_CONTEXT_BEGIN: return "begin statement";
8736  case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8737  case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8738  case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8739  case PM_CONTEXT_CASE_IN: return "'in' clause";
8740  case PM_CONTEXT_CLASS: return "class definition";
8741  case PM_CONTEXT_DEF: return "method definition";
8742  case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8743  case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8744  case PM_CONTEXT_DEFINED: return "'defined?' expression";
8745  case PM_CONTEXT_ELSE:
8746  case PM_CONTEXT_BEGIN_ELSE:
8747  case PM_CONTEXT_BLOCK_ELSE:
8748  case PM_CONTEXT_CLASS_ELSE:
8749  case PM_CONTEXT_DEF_ELSE:
8752  case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8753  case PM_CONTEXT_ELSIF: return "'elsif' clause";
8754  case PM_CONTEXT_EMBEXPR: return "embedded expression";
8758  case PM_CONTEXT_DEF_ENSURE:
8761  case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8762  case PM_CONTEXT_FOR: return "for loop";
8763  case PM_CONTEXT_FOR_INDEX: return "for loop index";
8764  case PM_CONTEXT_IF: return "if statement";
8765  case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8766  case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8767  case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8768  case PM_CONTEXT_MAIN: return "top level context";
8769  case PM_CONTEXT_MODULE: return "module definition";
8770  case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8771  case PM_CONTEXT_PARENS: return "parentheses";
8772  case PM_CONTEXT_POSTEXE: return "'END' block";
8773  case PM_CONTEXT_PREDICATE: return "predicate";
8774  case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8778  case PM_CONTEXT_DEF_RESCUE:
8782  case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8783  case PM_CONTEXT_SCLASS: return "singleton class definition";
8784  case PM_CONTEXT_TERNARY: return "ternary expression";
8785  case PM_CONTEXT_UNLESS: return "unless statement";
8786  case PM_CONTEXT_UNTIL: return "until statement";
8787  case PM_CONTEXT_WHILE: return "while statement";
8788  }
8789 
8790  assert(false && "unreachable");
8791  return "";
8792 }
8793 
8794 /******************************************************************************/
8795 /* Specific token lexers */
8796 /******************************************************************************/
8797 
8798 static inline void
8799 pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8800  if (invalid != NULL) {
8801  pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8802  pm_parser_err(parser, invalid, invalid + 1, diag_id);
8803  }
8804 }
8805 
8806 static size_t
8807 pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8808  const uint8_t *invalid = NULL;
8809  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8810  pm_strspn_number_validate(parser, string, length, invalid);
8811  return length;
8812 }
8813 
8814 static size_t
8815 pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8816  const uint8_t *invalid = NULL;
8817  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8818  pm_strspn_number_validate(parser, string, length, invalid);
8819  return length;
8820 }
8821 
8822 static size_t
8823 pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8824  const uint8_t *invalid = NULL;
8825  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8826  pm_strspn_number_validate(parser, string, length, invalid);
8827  return length;
8828 }
8829 
8830 static size_t
8831 pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8832  const uint8_t *invalid = NULL;
8833  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8834  pm_strspn_number_validate(parser, string, length, invalid);
8835  return length;
8836 }
8837 
8838 static pm_token_type_t
8839 lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8841 
8842  // Here we're going to attempt to parse the optional decimal portion of a
8843  // float. If it's not there, then it's okay and we'll just continue on.
8844  if (peek(parser) == '.') {
8845  if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8846  parser->current.end += 2;
8847  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8848  type = PM_TOKEN_FLOAT;
8849  } else {
8850  // If we had a . and then something else, then it's not a float
8851  // suffix on a number it's a method call or something else.
8852  return type;
8853  }
8854  }
8855 
8856  // Here we're going to attempt to parse the optional exponent portion of a
8857  // float. If it's not there, it's okay and we'll just continue on.
8858  if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8859  if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8860  parser->current.end += 2;
8861 
8862  if (pm_char_is_decimal_digit(peek(parser))) {
8863  parser->current.end++;
8864  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8865  } else {
8866  pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8867  }
8868  } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8869  parser->current.end++;
8870  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8871  } else {
8872  return type;
8873  }
8874 
8875  *seen_e = true;
8876  type = PM_TOKEN_FLOAT;
8877  }
8878 
8879  return type;
8880 }
8881 
8882 static pm_token_type_t
8883 lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8885  *seen_e = false;
8886 
8887  if (peek_offset(parser, -1) == '0') {
8888  switch (*parser->current.end) {
8889  // 0d1111 is a decimal number
8890  case 'd':
8891  case 'D':
8892  parser->current.end++;
8893  if (pm_char_is_decimal_digit(peek(parser))) {
8894  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8895  } else {
8896  match(parser, '_');
8897  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8898  }
8899 
8900  break;
8901 
8902  // 0b1111 is a binary number
8903  case 'b':
8904  case 'B':
8905  parser->current.end++;
8906  if (pm_char_is_binary_digit(peek(parser))) {
8907  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8908  } else {
8909  match(parser, '_');
8910  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8911  }
8912 
8914  break;
8915 
8916  // 0o1111 is an octal number
8917  case 'o':
8918  case 'O':
8919  parser->current.end++;
8920  if (pm_char_is_octal_digit(peek(parser))) {
8921  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8922  } else {
8923  match(parser, '_');
8924  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8925  }
8926 
8928  break;
8929 
8930  // 01111 is an octal number
8931  case '_':
8932  case '0':
8933  case '1':
8934  case '2':
8935  case '3':
8936  case '4':
8937  case '5':
8938  case '6':
8939  case '7':
8940  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8942  break;
8943 
8944  // 0x1111 is a hexadecimal number
8945  case 'x':
8946  case 'X':
8947  parser->current.end++;
8948  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8949  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8950  } else {
8951  match(parser, '_');
8952  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8953  }
8954 
8956  break;
8957 
8958  // 0.xxx is a float
8959  case '.': {
8960  type = lex_optional_float_suffix(parser, seen_e);
8961  break;
8962  }
8963 
8964  // 0exxx is a float
8965  case 'e':
8966  case 'E': {
8967  type = lex_optional_float_suffix(parser, seen_e);
8968  break;
8969  }
8970  }
8971  } else {
8972  // If it didn't start with a 0, then we'll lex as far as we can into a
8973  // decimal number.
8974  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8975 
8976  // Afterward, we'll lex as far as we can into an optional float suffix.
8977  type = lex_optional_float_suffix(parser, seen_e);
8978  }
8979 
8980  // At this point we have a completed number, but we want to provide the user
8981  // with a good experience if they put an additional .xxx fractional
8982  // component on the end, so we'll check for that here.
8983  if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8984  const uint8_t *fraction_start = parser->current.end;
8985  const uint8_t *fraction_end = parser->current.end + 2;
8986  fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8987  pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8988  }
8989 
8990  return type;
8991 }
8992 
8993 static pm_token_type_t
8994 lex_numeric(pm_parser_t *parser) {
8997 
8998  if (parser->current.end < parser->end) {
8999  bool seen_e = false;
9000  type = lex_numeric_prefix(parser, &seen_e);
9001 
9002  const uint8_t *end = parser->current.end;
9003  pm_token_type_t suffix_type = type;
9004 
9005  if (type == PM_TOKEN_INTEGER) {
9006  if (match(parser, 'r')) {
9007  suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9008 
9009  if (match(parser, 'i')) {
9011  }
9012  } else if (match(parser, 'i')) {
9013  suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9014  }
9015  } else {
9016  if (!seen_e && match(parser, 'r')) {
9017  suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9018 
9019  if (match(parser, 'i')) {
9020  suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9021  }
9022  } else if (match(parser, 'i')) {
9023  suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9024  }
9025  }
9026 
9027  const uint8_t b = peek(parser);
9028  if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9029  parser->current.end = end;
9030  } else {
9031  type = suffix_type;
9032  }
9033  }
9034 
9035  return type;
9036 }
9037 
9038 static pm_token_type_t
9039 lex_global_variable(pm_parser_t *parser) {
9040  if (parser->current.end >= parser->end) {
9041  pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9042  return PM_TOKEN_GLOBAL_VARIABLE;
9043  }
9044 
9045  // True if multiple characters are allowed after the declaration of the
9046  // global variable. Not true when it starts with "$-".
9047  bool allow_multiple = true;
9048 
9049  switch (*parser->current.end) {
9050  case '~': // $~: match-data
9051  case '*': // $*: argv
9052  case '$': // $$: pid
9053  case '?': // $?: last status
9054  case '!': // $!: error string
9055  case '@': // $@: error position
9056  case '/': // $/: input record separator
9057  case '\\': // $\: output record separator
9058  case ';': // $;: field separator
9059  case ',': // $,: output field separator
9060  case '.': // $.: last read line number
9061  case '=': // $=: ignorecase
9062  case ':': // $:: load path
9063  case '<': // $<: reading filename
9064  case '>': // $>: default output handle
9065  case '\"': // $": already loaded files
9066  parser->current.end++;
9067  return PM_TOKEN_GLOBAL_VARIABLE;
9068 
9069  case '&': // $&: last match
9070  case '`': // $`: string before last match
9071  case '\'': // $': string after last match
9072  case '+': // $+: string matches last paren.
9073  parser->current.end++;
9074  return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9075 
9076  case '0': {
9077  parser->current.end++;
9078  size_t width;
9079 
9080  if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
9081  do {
9082  parser->current.end += width;
9083  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9084 
9085  // $0 isn't allowed to be followed by anything.
9086  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9087  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9088  }
9089 
9090  return PM_TOKEN_GLOBAL_VARIABLE;
9091  }
9092 
9093  case '1':
9094  case '2':
9095  case '3':
9096  case '4':
9097  case '5':
9098  case '6':
9099  case '7':
9100  case '8':
9101  case '9':
9102  parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9103  return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9104 
9105  case '-':
9106  parser->current.end++;
9107  allow_multiple = false;
9108  /* fallthrough */
9109  default: {
9110  size_t width;
9111 
9112  if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9113  do {
9114  parser->current.end += width;
9115  } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9116  } else if (pm_char_is_whitespace(peek(parser))) {
9117  // If we get here, then we have a $ followed by whitespace,
9118  // which is not allowed.
9119  pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9120  } else {
9121  // If we get here, then we have a $ followed by something that
9122  // isn't recognized as a global variable.
9123  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9124  const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9125  PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9126  }
9127 
9128  return PM_TOKEN_GLOBAL_VARIABLE;
9129  }
9130  }
9131 }
9132 
9145 static inline pm_token_type_t
9146 lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9147  if (memcmp(current_start, value, vlen) == 0) {
9148  pm_lex_state_t last_state = parser->lex_state;
9149 
9150  if (parser->lex_state & PM_LEX_STATE_FNAME) {
9151  lex_state_set(parser, PM_LEX_STATE_ENDFN);
9152  } else {
9153  lex_state_set(parser, state);
9154  if (state == PM_LEX_STATE_BEG) {
9155  parser->command_start = true;
9156  }
9157 
9158  if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9159  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9160  return modifier_type;
9161  }
9162  }
9163 
9164  return type;
9165  }
9166 
9167  return PM_TOKEN_EOF;
9168 }
9169 
9170 static pm_token_type_t
9171 lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9172  // Lex as far as we can into the current identifier.
9173  size_t width;
9174  const uint8_t *end = parser->end;
9175  const uint8_t *current_start = parser->current.start;
9176  const uint8_t *current_end = parser->current.end;
9177  bool encoding_changed = parser->encoding_changed;
9178 
9179  if (encoding_changed) {
9180  while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
9181  current_end += width;
9182  }
9183  } else {
9184  while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
9185  current_end += width;
9186  }
9187  }
9188  parser->current.end = current_end;
9189 
9190  // Now cache the length of the identifier so that we can quickly compare it
9191  // against known keywords.
9192  width = (size_t) (current_end - current_start);
9193 
9194  if (current_end < end) {
9195  if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9196  // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9197  // check if we're returning the defined? keyword or just an identifier.
9198  width++;
9199 
9200  if (
9201  ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9202  (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9203  ) {
9204  // If we're in a position where we can accept a : at the end of an
9205  // identifier, then we'll optionally accept it.
9206  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9207  (void) match(parser, ':');
9208  return PM_TOKEN_LABEL;
9209  }
9210 
9211  if (parser->lex_state != PM_LEX_STATE_DOT) {
9212  if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9213  return PM_TOKEN_KEYWORD_DEFINED;
9214  }
9215  }
9216 
9217  return PM_TOKEN_METHOD_NAME;
9218  }
9219 
9220  if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9221  // If we're in a position where we can accept a = at the end of an
9222  // identifier, then we'll optionally accept it.
9223  return PM_TOKEN_IDENTIFIER;
9224  }
9225 
9226  if (
9227  ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9228  peek(parser) == ':' && peek_offset(parser, 1) != ':'
9229  ) {
9230  // If we're in a position where we can accept a : at the end of an
9231  // identifier, then we'll optionally accept it.
9232  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9233  (void) match(parser, ':');
9234  return PM_TOKEN_LABEL;
9235  }
9236  }
9237 
9238  if (parser->lex_state != PM_LEX_STATE_DOT) {
9240  switch (width) {
9241  case 2:
9242  if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9243  if (pm_do_loop_stack_p(parser)) {
9244  return PM_TOKEN_KEYWORD_DO_LOOP;
9245  }
9246  return PM_TOKEN_KEYWORD_DO;
9247  }
9248 
9249  if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9250  if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9251  if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9252  break;
9253  case 3:
9254  if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9255  if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9256  if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9257  if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9258  if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9259  if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9260  if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9261  break;
9262  case 4:
9263  if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9264  if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265  if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266  if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9267  if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9268  if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269  if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270  if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271  break;
9272  case 5:
9273  if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274  if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275  if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276  if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277  if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278  if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279  if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280  if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281  if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282  if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283  if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9284  if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9285  if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286  break;
9287  case 6:
9288  if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289  if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290  if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9291  if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292  if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9293  break;
9294  case 8:
9295  if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296  if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297  break;
9298  case 12:
9299  if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300  break;
9301  }
9302  }
9303 
9304  if (encoding_changed) {
9305  return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9306  }
9307  return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9308 }
9309 
9314 static bool
9315 current_token_starts_line(pm_parser_t *parser) {
9316  return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9317 }
9318 
9333 static pm_token_type_t
9334 lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9335  // If there is no content following this #, then we're at the end of
9336  // the string and we can safely return string content.
9337  if (pound + 1 >= parser->end) {
9338  parser->current.end = pound + 1;
9339  return PM_TOKEN_STRING_CONTENT;
9340  }
9341 
9342  // Now we'll check against the character that follows the #. If it constitutes
9343  // valid interplation, we'll handle that, otherwise we'll return
9344  // PM_TOKEN_NOT_PROVIDED.
9345  switch (pound[1]) {
9346  case '@': {
9347  // In this case we may have hit an embedded instance or class variable.
9348  if (pound + 2 >= parser->end) {
9349  parser->current.end = pound + 1;
9350  return PM_TOKEN_STRING_CONTENT;
9351  }
9352 
9353  // If we're looking at a @ and there's another @, then we'll skip past the
9354  // second @.
9355  const uint8_t *variable = pound + 2;
9356  if (*variable == '@' && pound + 3 < parser->end) variable++;
9357 
9358  if (char_is_identifier_start(parser, variable)) {
9359  // At this point we're sure that we've either hit an embedded instance
9360  // or class variable. In this case we'll first need to check if we've
9361  // already consumed content.
9362  if (pound > parser->current.start) {
9363  parser->current.end = pound;
9364  return PM_TOKEN_STRING_CONTENT;
9365  }
9366 
9367  // Otherwise we need to return the embedded variable token
9368  // and then switch to the embedded variable lex mode.
9369  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9370  parser->current.end = pound + 1;
9371  return PM_TOKEN_EMBVAR;
9372  }
9373 
9374  // If we didn't get a valid interpolation, then this is just regular
9375  // string content. This is like if we get "#@-". In this case the caller
9376  // should keep lexing.
9377  parser->current.end = pound + 1;
9378  return PM_TOKEN_NOT_PROVIDED;
9379  }
9380  case '$':
9381  // In this case we may have hit an embedded global variable. If there's
9382  // not enough room, then we'll just return string content.
9383  if (pound + 2 >= parser->end) {
9384  parser->current.end = pound + 1;
9385  return PM_TOKEN_STRING_CONTENT;
9386  }
9387 
9388  // This is the character that we're going to check to see if it is the
9389  // start of an identifier that would indicate that this is a global
9390  // variable.
9391  const uint8_t *check = pound + 2;
9392 
9393  if (pound[2] == '-') {
9394  if (pound + 3 >= parser->end) {
9395  parser->current.end = pound + 2;
9396  return PM_TOKEN_STRING_CONTENT;
9397  }
9398 
9399  check++;
9400  }
9401 
9402  // If the character that we're going to check is the start of an
9403  // identifier, or we don't have a - and the character is a decimal number
9404  // or a global name punctuation character, then we've hit an embedded
9405  // global variable.
9406  if (
9407  char_is_identifier_start(parser, check) ||
9408  (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9409  ) {
9410  // In this case we've hit an embedded global variable. First check to
9411  // see if we've already consumed content. If we have, then we need to
9412  // return that content as string content first.
9413  if (pound > parser->current.start) {
9414  parser->current.end = pound;
9415  return PM_TOKEN_STRING_CONTENT;
9416  }
9417 
9418  // Otherwise, we need to return the embedded variable token and switch
9419  // to the embedded variable lex mode.
9420  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9421  parser->current.end = pound + 1;
9422  return PM_TOKEN_EMBVAR;
9423  }
9424 
9425  // In this case we've hit a #$ that does not indicate a global variable.
9426  // In this case we'll continue lexing past it.
9427  parser->current.end = pound + 1;
9428  return PM_TOKEN_NOT_PROVIDED;
9429  case '{':
9430  // In this case it's the start of an embedded expression. If we have
9431  // already consumed content, then we need to return that content as string
9432  // content first.
9433  if (pound > parser->current.start) {
9434  parser->current.end = pound;
9435  return PM_TOKEN_STRING_CONTENT;
9436  }
9437 
9438  parser->enclosure_nesting++;
9439 
9440  // Otherwise we'll skip past the #{ and begin lexing the embedded
9441  // expression.
9442  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9443  parser->current.end = pound + 2;
9444  parser->command_start = true;
9445  pm_do_loop_stack_push(parser, false);
9446  return PM_TOKEN_EMBEXPR_BEGIN;
9447  default:
9448  // In this case we've hit a # that doesn't constitute interpolation. We'll
9449  // mark that by returning the not provided token type. This tells the
9450  // consumer to keep lexing forward.
9451  parser->current.end = pound + 1;
9452  return PM_TOKEN_NOT_PROVIDED;
9453  }
9454 }
9455 
9456 static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9457 static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9458 static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9459 static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9460 static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9461 
9465 static const bool ascii_printable_chars[] = {
9466  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9467  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9468  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9469  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9470  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9471  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9472  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9473  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9474 };
9475 
9476 static inline bool
9477 char_is_ascii_printable(const uint8_t b) {
9478  return (b < 0x80) && ascii_printable_chars[b];
9479 }
9480 
9485 static inline uint8_t
9486 escape_hexadecimal_digit(const uint8_t value) {
9487  return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9488 }
9489 
9495 static inline uint32_t
9496 escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9497  uint32_t value = 0;
9498  for (size_t index = 0; index < length; index++) {
9499  if (index != 0) value <<= 4;
9500  value |= escape_hexadecimal_digit(string[index]);
9501  }
9502 
9503  // Here we're going to verify that the value is actually a valid Unicode
9504  // codepoint and not a surrogate pair.
9505  if (value >= 0xD800 && value <= 0xDFFF) {
9506  pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9507  return 0xFFFD;
9508  }
9509 
9510  return value;
9511 }
9512 
9516 static inline uint8_t
9517 escape_byte(uint8_t value, const uint8_t flags) {
9518  if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9519  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9520  return value;
9521 }
9522 
9526 static inline void
9527 escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9528  // \u escape sequences in string-like structures implicitly change the
9529  // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9530  // literal.
9531  if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9532  if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9533  PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9534  }
9535 
9537  }
9538 
9539  if (value <= 0x7F) { // 0xxxxxxx
9540  pm_buffer_append_byte(buffer, (uint8_t) value);
9541  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
9542  pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
9543  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9544  } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
9545  pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
9546  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9547  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9548  } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
9549  pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
9550  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
9551  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9552  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9553  } else {
9554  pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9555  pm_buffer_append_byte(buffer, 0xEF);
9556  pm_buffer_append_byte(buffer, 0xBF);
9557  pm_buffer_append_byte(buffer, 0xBD);
9558  }
9559 }
9560 
9565 static inline void
9566 escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9567  if (byte >= 0x80) {
9568  if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9569  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9570  }
9571 
9572  parser->explicit_encoding = parser->encoding;
9573  }
9574 
9575  pm_buffer_append_byte(buffer, byte);
9576 }
9577 
9581 static inline void
9582 escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9583  size_t width;
9584  if (parser->encoding_changed) {
9585  width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9586  } else {
9587  width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9588  }
9589 
9590  // TODO: If the character is invalid in the given encoding, then we'll just
9591  // push one byte into the buffer. This should actually be an error.
9592  width = (width == 0) ? 1 : width;
9593 
9594  for (size_t index = 0; index < width; index++) {
9595  escape_write_byte_encoded(parser, buffer, *parser->current.end);
9596  parser->current.end++;
9597  }
9598 }
9599 
9615 static inline void
9616 escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9617  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9618  pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9619  }
9620 
9621  escape_write_byte_encoded(parser, buffer, byte);
9622 }
9623 
9629 static void
9630 escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9631 #define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9632 
9633  PM_PARSER_WARN_TOKEN_FORMAT(
9634  parser,
9635  parser->current,
9636  PM_WARN_INVALID_CHARACTER,
9637  FLAG(flags),
9638  FLAG(flag),
9639  type
9640  );
9641 
9642 #undef FLAG
9643 }
9644 
9648 static void
9649 escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9650  switch (peek(parser)) {
9651  case '\\': {
9652  parser->current.end++;
9653  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9654  return;
9655  }
9656  case '\'': {
9657  parser->current.end++;
9658  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9659  return;
9660  }
9661  case 'a': {
9662  parser->current.end++;
9663  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9664  return;
9665  }
9666  case 'b': {
9667  parser->current.end++;
9668  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9669  return;
9670  }
9671  case 'e': {
9672  parser->current.end++;
9673  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9674  return;
9675  }
9676  case 'f': {
9677  parser->current.end++;
9678  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9679  return;
9680  }
9681  case 'n': {
9682  parser->current.end++;
9683  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9684  return;
9685  }
9686  case 'r': {
9687  parser->current.end++;
9688  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9689  return;
9690  }
9691  case 's': {
9692  parser->current.end++;
9693  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9694  return;
9695  }
9696  case 't': {
9697  parser->current.end++;
9698  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9699  return;
9700  }
9701  case 'v': {
9702  parser->current.end++;
9703  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9704  return;
9705  }
9706  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9707  uint8_t value = (uint8_t) (*parser->current.end - '0');
9708  parser->current.end++;
9709 
9710  if (pm_char_is_octal_digit(peek(parser))) {
9711  value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9712  parser->current.end++;
9713 
9714  if (pm_char_is_octal_digit(peek(parser))) {
9715  value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9716  parser->current.end++;
9717  }
9718  }
9719 
9720  escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9721  return;
9722  }
9723  case 'x': {
9724  const uint8_t *start = parser->current.end - 1;
9725 
9726  parser->current.end++;
9727  uint8_t byte = peek(parser);
9728 
9729  if (pm_char_is_hexadecimal_digit(byte)) {
9730  uint8_t value = escape_hexadecimal_digit(byte);
9731  parser->current.end++;
9732 
9733  byte = peek(parser);
9734  if (pm_char_is_hexadecimal_digit(byte)) {
9735  value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9736  parser->current.end++;
9737  }
9738 
9739  value = escape_byte(value, flags);
9740  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9741  if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9742  pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9743  } else {
9744  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9745  }
9746  }
9747 
9748  escape_write_byte_encoded(parser, buffer, value);
9749  } else {
9750  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9751  }
9752 
9753  return;
9754  }
9755  case 'u': {
9756  const uint8_t *start = parser->current.end - 1;
9757  parser->current.end++;
9758 
9759  if (parser->current.end == parser->end) {
9760  const uint8_t *start = parser->current.end - 2;
9761  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9762  } else if (peek(parser) == '{') {
9763  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9764  parser->current.end++;
9765 
9766  size_t whitespace;
9767  while (true) {
9768  if ((whitespace = pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9769  parser->current.end += whitespace;
9770  } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9771  // This is super hacky, but it gets us nicer error
9772  // messages because we can still pass it off to the
9773  // regular expression engine even if we hit an
9774  // unterminated regular expression.
9775  parser->current.end += 2;
9776  } else {
9777  break;
9778  }
9779  }
9780 
9781  const uint8_t *extra_codepoints_start = NULL;
9782  int codepoints_count = 0;
9783 
9784  while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9785  const uint8_t *unicode_start = parser->current.end;
9786  size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9787 
9788  if (hexadecimal_length > 6) {
9789  // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9790  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9791  } else if (hexadecimal_length == 0) {
9792  // there are not hexadecimal characters
9793 
9794  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9795  // If this is a regular expression, we are going to
9796  // let the regular expression engine handle this
9797  // error instead of us.
9798  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9799  } else {
9800  pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9801  pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9802  }
9803 
9804  return;
9805  }
9806 
9807  parser->current.end += hexadecimal_length;
9808  codepoints_count++;
9809  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9810  extra_codepoints_start = unicode_start;
9811  }
9812 
9813  uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9814  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9815 
9816  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
9817  }
9818 
9819  // ?\u{nnnn} character literal should contain only one codepoint
9820  // and cannot be like ?\u{nnnn mmmm}.
9821  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9822  pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9823  }
9824 
9825  if (parser->current.end == parser->end) {
9826  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9827  } else if (peek(parser) == '}') {
9828  parser->current.end++;
9829  } else {
9830  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9831  // If this is a regular expression, we are going to let
9832  // the regular expression engine handle this error
9833  // instead of us.
9834  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9835  } else {
9836  pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9837  }
9838  }
9839 
9840  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9841  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9842  }
9843  } else {
9844  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9845 
9846  if (length == 0) {
9847  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9848  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9849  } else {
9850  const uint8_t *start = parser->current.end - 2;
9851  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9852  }
9853  } else if (length == 4) {
9854  uint32_t value = escape_unicode(parser, parser->current.end, 4);
9855 
9856  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9857  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9858  }
9859 
9860  escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9861  parser->current.end += 4;
9862  } else {
9863  parser->current.end += length;
9864 
9865  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9866  // If this is a regular expression, we are going to let
9867  // the regular expression engine handle this error
9868  // instead of us.
9869  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9870  } else {
9871  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9872  }
9873  }
9874  }
9875 
9876  return;
9877  }
9878  case 'c': {
9879  parser->current.end++;
9880  if (flags & PM_ESCAPE_FLAG_CONTROL) {
9881  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9882  }
9883 
9884  if (parser->current.end == parser->end) {
9885  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9886  return;
9887  }
9888 
9889  uint8_t peeked = peek(parser);
9890  switch (peeked) {
9891  case '?': {
9892  parser->current.end++;
9893  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9894  return;
9895  }
9896  case '\\':
9897  parser->current.end++;
9898 
9899  if (match(parser, 'u') || match(parser, 'U')) {
9900  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9901  return;
9902  }
9903 
9904  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9905  return;
9906  case ' ':
9907  parser->current.end++;
9908  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9909  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9910  return;
9911  case '\t':
9912  parser->current.end++;
9913  escape_read_warn(parser, flags, 0, "\\t");
9914  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9915  return;
9916  default: {
9917  if (!char_is_ascii_printable(peeked)) {
9918  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9919  return;
9920  }
9921 
9922  parser->current.end++;
9923  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9924  return;
9925  }
9926  }
9927  }
9928  case 'C': {
9929  parser->current.end++;
9930  if (flags & PM_ESCAPE_FLAG_CONTROL) {
9931  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9932  }
9933 
9934  if (peek(parser) != '-') {
9935  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9936  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9937  return;
9938  }
9939 
9940  parser->current.end++;
9941  if (parser->current.end == parser->end) {
9942  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9943  return;
9944  }
9945 
9946  uint8_t peeked = peek(parser);
9947  switch (peeked) {
9948  case '?': {
9949  parser->current.end++;
9950  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9951  return;
9952  }
9953  case '\\':
9954  parser->current.end++;
9955 
9956  if (match(parser, 'u') || match(parser, 'U')) {
9957  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9958  return;
9959  }
9960 
9961  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9962  return;
9963  case ' ':
9964  parser->current.end++;
9965  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9966  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9967  return;
9968  case '\t':
9969  parser->current.end++;
9970  escape_read_warn(parser, flags, 0, "\\t");
9971  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9972  return;
9973  default: {
9974  if (!char_is_ascii_printable(peeked)) {
9975  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9976  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9977  return;
9978  }
9979 
9980  parser->current.end++;
9981  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9982  return;
9983  }
9984  }
9985  }
9986  case 'M': {
9987  parser->current.end++;
9988  if (flags & PM_ESCAPE_FLAG_META) {
9989  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9990  }
9991 
9992  if (peek(parser) != '-') {
9993  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9994  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9995  return;
9996  }
9997 
9998  parser->current.end++;
9999  if (parser->current.end == parser->end) {
10000  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10001  return;
10002  }
10003 
10004  uint8_t peeked = peek(parser);
10005  switch (peeked) {
10006  case '\\':
10007  parser->current.end++;
10008 
10009  if (match(parser, 'u') || match(parser, 'U')) {
10010  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10011  return;
10012  }
10013 
10014  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10015  return;
10016  case ' ':
10017  parser->current.end++;
10018  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10019  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10020  return;
10021  case '\t':
10022  parser->current.end++;
10023  escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10024  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10025  return;
10026  default:
10027  if (!char_is_ascii_printable(peeked)) {
10028  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10029  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10030  return;
10031  }
10032 
10033  parser->current.end++;
10034  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10035  return;
10036  }
10037  }
10038  case '\r': {
10039  if (peek_offset(parser, 1) == '\n') {
10040  parser->current.end += 2;
10041  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10042  return;
10043  }
10044  }
10045  /* fallthrough */
10046  default: {
10047  if (parser->current.end < parser->end) {
10048  escape_write_escape_encoded(parser, buffer);
10049  } else {
10050  pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10051  }
10052  return;
10053  }
10054  }
10055 }
10056 
10082 static pm_token_type_t
10083 lex_question_mark(pm_parser_t *parser) {
10084  if (lex_state_end_p(parser)) {
10085  lex_state_set(parser, PM_LEX_STATE_BEG);
10086  return PM_TOKEN_QUESTION_MARK;
10087  }
10088 
10089  if (parser->current.end >= parser->end) {
10090  pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10091  pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10093  }
10094 
10095  if (pm_char_is_whitespace(*parser->current.end)) {
10096  lex_state_set(parser, PM_LEX_STATE_BEG);
10097  return PM_TOKEN_QUESTION_MARK;
10098  }
10099 
10100  lex_state_set(parser, PM_LEX_STATE_BEG);
10101 
10102  if (match(parser, '\\')) {
10103  lex_state_set(parser, PM_LEX_STATE_END);
10104 
10105  pm_buffer_t buffer;
10106  pm_buffer_init_capacity(&buffer, 3);
10107 
10108  escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10109  pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10110 
10112  } else {
10113  size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10114 
10115  // Ternary operators can have a ? immediately followed by an identifier
10116  // which starts with an underscore. We check for this case here.
10117  if (
10118  !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10119  (
10120  (parser->current.end + encoding_width >= parser->end) ||
10121  !char_is_identifier(parser, parser->current.end + encoding_width)
10122  )
10123  ) {
10124  lex_state_set(parser, PM_LEX_STATE_END);
10125  parser->current.end += encoding_width;
10126  pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10128  }
10129  }
10130 
10131  return PM_TOKEN_QUESTION_MARK;
10132 }
10133 
10138 static pm_token_type_t
10139 lex_at_variable(pm_parser_t *parser) {
10141  size_t width;
10142 
10143  if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
10144  parser->current.end += width;
10145 
10146  while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
10147  parser->current.end += width;
10148  }
10149  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
10150  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10151  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10152  diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10153  }
10154 
10155  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10156  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10157  } else {
10158  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10159  pm_parser_err_token(parser, &parser->current, diag_id);
10160  }
10161 
10162  // If we're lexing an embedded variable, then we need to pop back into the
10163  // parent lex context.
10164  if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10165  lex_mode_pop(parser);
10166  }
10167 
10168  return type;
10169 }
10170 
10174 static inline void
10175 parser_lex_callback(pm_parser_t *parser) {
10176  if (parser->lex_callback) {
10177  parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10178  }
10179 }
10180 
10184 static inline pm_comment_t *
10185 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10186  pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10187  if (comment == NULL) return NULL;
10188 
10189  *comment = (pm_comment_t) {
10190  .type = type,
10191  .location = { parser->current.start, parser->current.end }
10192  };
10193 
10194  return comment;
10195 }
10196 
10202 static pm_token_type_t
10203 lex_embdoc(pm_parser_t *parser) {
10204  // First, lex out the EMBDOC_BEGIN token.
10205  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10206 
10207  if (newline == NULL) {
10208  parser->current.end = parser->end;
10209  } else {
10210  pm_newline_list_append(&parser->newline_list, newline);
10211  parser->current.end = newline + 1;
10212  }
10213 
10214  parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10215  parser_lex_callback(parser);
10216 
10217  // Now, create a comment that is going to be attached to the parser.
10218  pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10219  if (comment == NULL) return PM_TOKEN_EOF;
10220 
10221  // Now, loop until we find the end of the embedded documentation or the end
10222  // of the file.
10223  while (parser->current.end + 4 <= parser->end) {
10224  parser->current.start = parser->current.end;
10225 
10226  // If we've hit the end of the embedded documentation then we'll return
10227  // that token here.
10228  if (
10229  (memcmp(parser->current.end, "=end", 4) == 0) &&
10230  (
10231  (parser->current.end + 4 == parser->end) || // end of file
10232  pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10233  (parser->current.end[4] == '\0') || // NUL or end of script
10234  (parser->current.end[4] == '\004') || // ^D
10235  (parser->current.end[4] == '\032') // ^Z
10236  )
10237  ) {
10238  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10239 
10240  if (newline == NULL) {
10241  parser->current.end = parser->end;
10242  } else {
10243  pm_newline_list_append(&parser->newline_list, newline);
10244  parser->current.end = newline + 1;
10245  }
10246 
10247  parser->current.type = PM_TOKEN_EMBDOC_END;
10248  parser_lex_callback(parser);
10249 
10250  comment->location.end = parser->current.end;
10251  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10252 
10253  return PM_TOKEN_EMBDOC_END;
10254  }
10255 
10256  // Otherwise, we'll parse until the end of the line and return a line of
10257  // embedded documentation.
10258  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10259 
10260  if (newline == NULL) {
10261  parser->current.end = parser->end;
10262  } else {
10263  pm_newline_list_append(&parser->newline_list, newline);
10264  parser->current.end = newline + 1;
10265  }
10266 
10267  parser->current.type = PM_TOKEN_EMBDOC_LINE;
10268  parser_lex_callback(parser);
10269  }
10270 
10271  pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10272 
10273  comment->location.end = parser->current.end;
10274  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10275 
10276  return PM_TOKEN_EOF;
10277 }
10278 
10284 static inline void
10285 parser_lex_ignored_newline(pm_parser_t *parser) {
10286  parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10287  parser_lex_callback(parser);
10288 }
10289 
10299 static inline void
10300 parser_flush_heredoc_end(pm_parser_t *parser) {
10301  assert(parser->heredoc_end <= parser->end);
10302  parser->next_start = parser->heredoc_end;
10303  parser->heredoc_end = NULL;
10304 }
10305 
10309 static bool
10310 parser_end_of_line_p(const pm_parser_t *parser) {
10311  const uint8_t *cursor = parser->current.end;
10312 
10313  while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10314  if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10315  }
10316 
10317  return true;
10318 }
10319 
10338 typedef struct {
10344 
10349  const uint8_t *cursor;
10351 
10364 typedef struct {
10367 
10371 
10375 static inline void
10376 pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10377  pm_buffer_append_byte(&token_buffer->buffer, byte);
10378 }
10379 
10380 static inline void
10381 pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10382  pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10383 }
10384 
10388 static inline size_t
10389 parser_char_width(const pm_parser_t *parser) {
10390  size_t width;
10391  if (parser->encoding_changed) {
10392  width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10393  } else {
10394  width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10395  }
10396 
10397  // TODO: If the character is invalid in the given encoding, then we'll just
10398  // push one byte into the buffer. This should actually be an error.
10399  return (width == 0 ? 1 : width);
10400 }
10401 
10405 static void
10406 pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10407  size_t width = parser_char_width(parser);
10408  pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10409  parser->current.end += width;
10410 }
10411 
10412 static void
10413 pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10414  size_t width = parser_char_width(parser);
10415  pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10416  pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10417  parser->current.end += width;
10418 }
10419 
10420 static bool
10421 pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10422  for (size_t index = 0; index < length; index++) {
10423  if (value[index] & 0x80) return false;
10424  }
10425 
10426  return true;
10427 }
10428 
10435 static inline void
10436 pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10437  pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10438 }
10439 
10440 static inline void
10441 pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10442  pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10443  parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10444  pm_buffer_free(&token_buffer->regexp_buffer);
10445 }
10446 
10456 static void
10457 pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10458  if (token_buffer->cursor == NULL) {
10459  pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10460  } else {
10461  pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10462  pm_token_buffer_copy(parser, token_buffer);
10463  }
10464 }
10465 
10466 static void
10467 pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10468  if (token_buffer->base.cursor == NULL) {
10469  pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10470  parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10471  } else {
10472  pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10473  pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10474  pm_regexp_token_buffer_copy(parser, token_buffer);
10475  }
10476 }
10477 
10478 #define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10479 
10488 static void
10489 pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10490  const uint8_t *start;
10491  if (token_buffer->cursor == NULL) {
10492  pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10493  start = parser->current.start;
10494  } else {
10495  start = token_buffer->cursor;
10496  }
10497 
10498  const uint8_t *end = parser->current.end - 1;
10499  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10500 
10501  token_buffer->cursor = end;
10502 }
10503 
10504 static void
10505 pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10506  const uint8_t *start;
10507  if (token_buffer->base.cursor == NULL) {
10508  pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10509  pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10510  start = parser->current.start;
10511  } else {
10512  start = token_buffer->base.cursor;
10513  }
10514 
10515  const uint8_t *end = parser->current.end - 1;
10516  pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10517  pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10518 
10519  token_buffer->base.cursor = end;
10520 }
10521 
10522 #undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10523 
10528 static inline size_t
10529 pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10530  size_t whitespace = 0;
10531 
10532  switch (indent) {
10533  case PM_HEREDOC_INDENT_NONE:
10534  // Do nothing, we can't match a terminator with
10535  // indentation and there's no need to calculate common
10536  // whitespace.
10537  break;
10538  case PM_HEREDOC_INDENT_DASH:
10539  // Skip past inline whitespace.
10540  *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10541  break;
10542  case PM_HEREDOC_INDENT_TILDE:
10543  // Skip past inline whitespace and calculate common
10544  // whitespace.
10545  while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10546  if (**cursor == '\t') {
10547  whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10548  } else {
10549  whitespace++;
10550  }
10551  (*cursor)++;
10552  }
10553 
10554  break;
10555  }
10556 
10557  return whitespace;
10558 }
10559 
10564 static uint8_t
10565 pm_lex_percent_delimiter(pm_parser_t *parser) {
10566  size_t eol_length = match_eol(parser);
10567 
10568  if (eol_length) {
10569  if (parser->heredoc_end) {
10570  // If we have already lexed a heredoc, then the newline has already
10571  // been added to the list. In this case we want to just flush the
10572  // heredoc end.
10573  parser_flush_heredoc_end(parser);
10574  } else {
10575  // Otherwise, we'll add the newline to the list of newlines.
10576  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10577  }
10578 
10579  const uint8_t delimiter = *parser->current.end;
10580  parser->current.end += eol_length;
10581 
10582  return delimiter;
10583  }
10584 
10585  return *parser->current.end++;
10586 }
10587 
10592 #define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10593 
10600 static void
10601 parser_lex(pm_parser_t *parser) {
10602  assert(parser->current.end <= parser->end);
10603  parser->previous = parser->current;
10604 
10605  // This value mirrors cmd_state from CRuby.
10606  bool previous_command_start = parser->command_start;
10607  parser->command_start = false;
10608 
10609  // This is used to communicate to the newline lexing function that we've
10610  // already seen a comment.
10611  bool lexed_comment = false;
10612 
10613  // Here we cache the current value of the semantic token seen flag. This is
10614  // used to reset it in case we find a token that shouldn't flip this flag.
10615  unsigned int semantic_token_seen = parser->semantic_token_seen;
10616  parser->semantic_token_seen = true;
10617 
10618  switch (parser->lex_modes.current->mode) {
10619  case PM_LEX_DEFAULT:
10620  case PM_LEX_EMBEXPR:
10621  case PM_LEX_EMBVAR:
10622 
10623  // We have a specific named label here because we are going to jump back to
10624  // this location in the event that we have lexed a token that should not be
10625  // returned to the parser. This includes comments, ignored newlines, and
10626  // invalid tokens of some form.
10627  lex_next_token: {
10628  // If we have the special next_start pointer set, then we're going to jump
10629  // to that location and start lexing from there.
10630  if (parser->next_start != NULL) {
10631  parser->current.end = parser->next_start;
10632  parser->next_start = NULL;
10633  }
10634 
10635  // This value mirrors space_seen from CRuby. It tracks whether or not
10636  // space has been eaten before the start of the next token.
10637  bool space_seen = false;
10638 
10639  // First, we're going to skip past any whitespace at the front of the next
10640  // token.
10641  bool chomping = true;
10642  while (parser->current.end < parser->end && chomping) {
10643  switch (*parser->current.end) {
10644  case ' ':
10645  case '\t':
10646  case '\f':
10647  case '\v':
10648  parser->current.end++;
10649  space_seen = true;
10650  break;
10651  case '\r':
10652  if (match_eol_offset(parser, 1)) {
10653  chomping = false;
10654  } else {
10655  pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10656  parser->current.end++;
10657  space_seen = true;
10658  }
10659  break;
10660  case '\\': {
10661  size_t eol_length = match_eol_offset(parser, 1);
10662  if (eol_length) {
10663  if (parser->heredoc_end) {
10664  parser->current.end = parser->heredoc_end;
10665  parser->heredoc_end = NULL;
10666  } else {
10667  parser->current.end += eol_length + 1;
10668  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10669  space_seen = true;
10670  }
10671  } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10672  parser->current.end += 2;
10673  } else {
10674  chomping = false;
10675  }
10676 
10677  break;
10678  }
10679  default:
10680  chomping = false;
10681  break;
10682  }
10683  }
10684 
10685  // Next, we'll set to start of this token to be the current end.
10686  parser->current.start = parser->current.end;
10687 
10688  // We'll check if we're at the end of the file. If we are, then we
10689  // need to return the EOF token.
10690  if (parser->current.end >= parser->end) {
10691  LEX(PM_TOKEN_EOF);
10692  }
10693 
10694  // Finally, we'll check the current character to determine the next
10695  // token.
10696  switch (*parser->current.end++) {
10697  case '\0': // NUL or end of script
10698  case '\004': // ^D
10699  case '\032': // ^Z
10700  parser->current.end--;
10701  LEX(PM_TOKEN_EOF);
10702 
10703  case '#': { // comments
10704  const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10705  parser->current.end = ending == NULL ? parser->end : ending;
10706 
10707  // If we found a comment while lexing, then we're going to
10708  // add it to the list of comments in the file and keep
10709  // lexing.
10710  pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10711  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10712 
10713  if (ending) parser->current.end++;
10714  parser->current.type = PM_TOKEN_COMMENT;
10715  parser_lex_callback(parser);
10716 
10717  // Here, parse the comment to see if it's a magic comment
10718  // and potentially change state on the parser.
10719  if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10720  ptrdiff_t length = parser->current.end - parser->current.start;
10721 
10722  // If we didn't find a magic comment within the first
10723  // pass and we're at the start of the file, then we need
10724  // to do another pass to potentially find other patterns
10725  // for encoding comments.
10726  if (length >= 10 && !parser->encoding_locked) {
10727  parser_lex_magic_comment_encoding(parser);
10728  }
10729  }
10730 
10731  lexed_comment = true;
10732  }
10733  /* fallthrough */
10734  case '\r':
10735  case '\n': {
10736  parser->semantic_token_seen = semantic_token_seen & 0x1;
10737  size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10738 
10739  if (eol_length) {
10740  // The only way you can have carriage returns in this
10741  // particular loop is if you have a carriage return
10742  // followed by a newline. In that case we'll just skip
10743  // over the carriage return and continue lexing, in
10744  // order to make it so that the newline token
10745  // encapsulates both the carriage return and the
10746  // newline. Note that we need to check that we haven't
10747  // already lexed a comment here because that falls
10748  // through into here as well.
10749  if (!lexed_comment) {
10750  parser->current.end += eol_length - 1; // skip CR
10751  }
10752 
10753  if (parser->heredoc_end == NULL) {
10754  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10755  }
10756  }
10757 
10758  if (parser->heredoc_end) {
10759  parser_flush_heredoc_end(parser);
10760  }
10761 
10762  // If this is an ignored newline, then we can continue lexing after
10763  // calling the callback with the ignored newline token.
10764  switch (lex_state_ignored_p(parser)) {
10765  case PM_IGNORED_NEWLINE_NONE:
10766  break;
10767  case PM_IGNORED_NEWLINE_PATTERN:
10768  if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10769  if (!lexed_comment) parser_lex_ignored_newline(parser);
10770  lex_state_set(parser, PM_LEX_STATE_BEG);
10771  parser->command_start = true;
10772  parser->current.type = PM_TOKEN_NEWLINE;
10773  return;
10774  }
10775  /* fallthrough */
10776  case PM_IGNORED_NEWLINE_ALL:
10777  if (!lexed_comment) parser_lex_ignored_newline(parser);
10778  lexed_comment = false;
10779  goto lex_next_token;
10780  }
10781 
10782  // Here we need to look ahead and see if there is a call operator
10783  // (either . or &.) that starts the next line. If there is, then this
10784  // is going to become an ignored newline and we're going to instead
10785  // return the call operator.
10786  const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10787  next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10788 
10789  if (next_content < parser->end) {
10790  // If we hit a comment after a newline, then we're going to check
10791  // if it's ignored or if it's followed by a method call ('.').
10792  // If it is, then we're going to call the
10793  // callback with an ignored newline and then continue lexing.
10794  // Otherwise we'll return a regular newline.
10795  if (next_content[0] == '#') {
10796  // Here we look for a "." or "&." following a "\n".
10797  const uint8_t *following = next_newline(next_content, parser->end - next_content);
10798 
10799  while (following && (following + 1 < parser->end)) {
10800  following++;
10801  following += pm_strspn_inline_whitespace(following, parser->end - following);
10802 
10803  // If this is not followed by a comment, then we can break out
10804  // of this loop.
10805  if (peek_at(parser, following) != '#') break;
10806 
10807  // If there is a comment, then we need to find the end of the
10808  // comment and continue searching from there.
10809  following = next_newline(following, parser->end - following);
10810  }
10811 
10812  // If the lex state was ignored, or we hit a '.' or a '&.',
10813  // we will lex the ignored newline
10814  if (
10815  lex_state_ignored_p(parser) ||
10816  (following && (
10817  (peek_at(parser, following) == '.') ||
10818  (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10819  ))
10820  ) {
10821  if (!lexed_comment) parser_lex_ignored_newline(parser);
10822  lexed_comment = false;
10823  goto lex_next_token;
10824  }
10825  }
10826 
10827  // If we hit a . after a newline, then we're in a call chain and
10828  // we need to return the call operator.
10829  if (next_content[0] == '.') {
10830  // To match ripper, we need to emit an ignored newline even though
10831  // it's a real newline in the case that we have a beginless range
10832  // on a subsequent line.
10833  if (peek_at(parser, next_content + 1) == '.') {
10834  if (!lexed_comment) parser_lex_ignored_newline(parser);
10835  lex_state_set(parser, PM_LEX_STATE_BEG);
10836  parser->command_start = true;
10837  parser->current.type = PM_TOKEN_NEWLINE;
10838  return;
10839  }
10840 
10841  if (!lexed_comment) parser_lex_ignored_newline(parser);
10842  lex_state_set(parser, PM_LEX_STATE_DOT);
10843  parser->current.start = next_content;
10844  parser->current.end = next_content + 1;
10845  parser->next_start = NULL;
10846  LEX(PM_TOKEN_DOT);
10847  }
10848 
10849  // If we hit a &. after a newline, then we're in a call chain and
10850  // we need to return the call operator.
10851  if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10852  if (!lexed_comment) parser_lex_ignored_newline(parser);
10853  lex_state_set(parser, PM_LEX_STATE_DOT);
10854  parser->current.start = next_content;
10855  parser->current.end = next_content + 2;
10856  parser->next_start = NULL;
10858  }
10859  }
10860 
10861  // At this point we know this is a regular newline, and we can set the
10862  // necessary state and return the token.
10863  lex_state_set(parser, PM_LEX_STATE_BEG);
10864  parser->command_start = true;
10865  parser->current.type = PM_TOKEN_NEWLINE;
10866  if (!lexed_comment) parser_lex_callback(parser);
10867  return;
10868  }
10869 
10870  // ,
10871  case ',':
10872  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10873  LEX(PM_TOKEN_COMMA);
10874 
10875  // (
10876  case '(': {
10878 
10879  if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10881  }
10882 
10883  parser->enclosure_nesting++;
10884  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10885  pm_do_loop_stack_push(parser, false);
10886  LEX(type);
10887  }
10888 
10889  // )
10890  case ')':
10891  parser->enclosure_nesting--;
10892  lex_state_set(parser, PM_LEX_STATE_ENDFN);
10893  pm_do_loop_stack_pop(parser);
10895 
10896  // ;
10897  case ';':
10898  lex_state_set(parser, PM_LEX_STATE_BEG);
10899  parser->command_start = true;
10900  LEX(PM_TOKEN_SEMICOLON);
10901 
10902  // [ [] []=
10903  case '[':
10904  parser->enclosure_nesting++;
10906 
10907  if (lex_state_operator_p(parser)) {
10908  if (match(parser, ']')) {
10909  parser->enclosure_nesting--;
10910  lex_state_set(parser, PM_LEX_STATE_ARG);
10912  }
10913 
10914  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10915  LEX(type);
10916  }
10917 
10918  if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10920  }
10921 
10922  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10923  pm_do_loop_stack_push(parser, false);
10924  LEX(type);
10925 
10926  // ]
10927  case ']':
10928  parser->enclosure_nesting--;
10929  lex_state_set(parser, PM_LEX_STATE_END);
10930  pm_do_loop_stack_pop(parser);
10932 
10933  // {
10934  case '{': {
10936 
10937  if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10938  // This { begins a lambda
10939  parser->command_start = true;
10940  lex_state_set(parser, PM_LEX_STATE_BEG);
10942  } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10943  // This { begins a hash literal
10944  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10945  } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10946  // This { begins a block
10947  parser->command_start = true;
10948  lex_state_set(parser, PM_LEX_STATE_BEG);
10949  } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10950  // This { begins a block on a command
10951  parser->command_start = true;
10952  lex_state_set(parser, PM_LEX_STATE_BEG);
10953  } else {
10954  // This { begins a hash literal
10955  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10956  }
10957 
10958  parser->enclosure_nesting++;
10959  parser->brace_nesting++;
10960  pm_do_loop_stack_push(parser, false);
10961 
10962  LEX(type);
10963  }
10964 
10965  // }
10966  case '}':
10967  parser->enclosure_nesting--;
10968  pm_do_loop_stack_pop(parser);
10969 
10970  if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10971  lex_mode_pop(parser);
10972  LEX(PM_TOKEN_EMBEXPR_END);
10973  }
10974 
10975  parser->brace_nesting--;
10976  lex_state_set(parser, PM_LEX_STATE_END);
10977  LEX(PM_TOKEN_BRACE_RIGHT);
10978 
10979  // * ** **= *=
10980  case '*': {
10981  if (match(parser, '*')) {
10982  if (match(parser, '=')) {
10983  lex_state_set(parser, PM_LEX_STATE_BEG);
10985  }
10986 
10988 
10989  if (lex_state_spcarg_p(parser, space_seen)) {
10990  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10992  } else if (lex_state_beg_p(parser)) {
10994  } else if (ambiguous_operator_p(parser, space_seen)) {
10995  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10996  }
10997 
10998  if (lex_state_operator_p(parser)) {
10999  lex_state_set(parser, PM_LEX_STATE_ARG);
11000  } else {
11001  lex_state_set(parser, PM_LEX_STATE_BEG);
11002  }
11003 
11004  LEX(type);
11005  }
11006 
11007  if (match(parser, '=')) {
11008  lex_state_set(parser, PM_LEX_STATE_BEG);
11009  LEX(PM_TOKEN_STAR_EQUAL);
11010  }
11011 
11013 
11014  if (lex_state_spcarg_p(parser, space_seen)) {
11015  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11016  type = PM_TOKEN_USTAR;
11017  } else if (lex_state_beg_p(parser)) {
11018  type = PM_TOKEN_USTAR;
11019  } else if (ambiguous_operator_p(parser, space_seen)) {
11020  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11021  }
11022 
11023  if (lex_state_operator_p(parser)) {
11024  lex_state_set(parser, PM_LEX_STATE_ARG);
11025  } else {
11026  lex_state_set(parser, PM_LEX_STATE_BEG);
11027  }
11028 
11029  LEX(type);
11030  }
11031 
11032  // ! != !~ !@
11033  case '!':
11034  if (lex_state_operator_p(parser)) {
11035  lex_state_set(parser, PM_LEX_STATE_ARG);
11036  if (match(parser, '@')) {
11037  LEX(PM_TOKEN_BANG);
11038  }
11039  } else {
11040  lex_state_set(parser, PM_LEX_STATE_BEG);
11041  }
11042 
11043  if (match(parser, '=')) {
11044  LEX(PM_TOKEN_BANG_EQUAL);
11045  }
11046 
11047  if (match(parser, '~')) {
11048  LEX(PM_TOKEN_BANG_TILDE);
11049  }
11050 
11051  LEX(PM_TOKEN_BANG);
11052 
11053  // = => =~ == === =begin
11054  case '=':
11055  if (
11056  current_token_starts_line(parser) &&
11057  (parser->current.end + 5 <= parser->end) &&
11058  memcmp(parser->current.end, "begin", 5) == 0 &&
11059  (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11060  ) {
11061  pm_token_type_t type = lex_embdoc(parser);
11062  if (type == PM_TOKEN_EOF) {
11063  LEX(type);
11064  }
11065 
11066  goto lex_next_token;
11067  }
11068 
11069  if (lex_state_operator_p(parser)) {
11070  lex_state_set(parser, PM_LEX_STATE_ARG);
11071  } else {
11072  lex_state_set(parser, PM_LEX_STATE_BEG);
11073  }
11074 
11075  if (match(parser, '>')) {
11077  }
11078 
11079  if (match(parser, '~')) {
11080  LEX(PM_TOKEN_EQUAL_TILDE);
11081  }
11082 
11083  if (match(parser, '=')) {
11084  LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11085  }
11086 
11087  LEX(PM_TOKEN_EQUAL);
11088 
11089  // < << <<= <= <=>
11090  case '<':
11091  if (match(parser, '<')) {
11092  if (
11093  !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11094  !lex_state_end_p(parser) &&
11095  (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11096  ) {
11097  const uint8_t *end = parser->current.end;
11098 
11099  pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11100  pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11101 
11102  if (match(parser, '-')) {
11103  indent = PM_HEREDOC_INDENT_DASH;
11104  }
11105  else if (match(parser, '~')) {
11106  indent = PM_HEREDOC_INDENT_TILDE;
11107  }
11108 
11109  if (match(parser, '`')) {
11110  quote = PM_HEREDOC_QUOTE_BACKTICK;
11111  }
11112  else if (match(parser, '"')) {
11113  quote = PM_HEREDOC_QUOTE_DOUBLE;
11114  }
11115  else if (match(parser, '\'')) {
11116  quote = PM_HEREDOC_QUOTE_SINGLE;
11117  }
11118 
11119  const uint8_t *ident_start = parser->current.end;
11120  size_t width = 0;
11121 
11122  if (parser->current.end >= parser->end) {
11123  parser->current.end = end;
11124  } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
11125  parser->current.end = end;
11126  } else {
11127  if (quote == PM_HEREDOC_QUOTE_NONE) {
11128  parser->current.end += width;
11129 
11130  while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
11131  parser->current.end += width;
11132  }
11133  } else {
11134  // If we have quotes, then we're going to go until we find the
11135  // end quote.
11136  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11137  if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11138  parser->current.end++;
11139  }
11140  }
11141 
11142  size_t ident_length = (size_t) (parser->current.end - ident_start);
11143  bool ident_error = false;
11144 
11145  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11146  pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11147  ident_error = true;
11148  }
11149 
11150  parser->explicit_encoding = NULL;
11151  lex_mode_push(parser, (pm_lex_mode_t) {
11152  .mode = PM_LEX_HEREDOC,
11153  .as.heredoc = {
11154  .base = {
11155  .ident_start = ident_start,
11156  .ident_length = ident_length,
11157  .quote = quote,
11158  .indent = indent
11159  },
11160  .next_start = parser->current.end,
11161  .common_whitespace = NULL,
11162  .line_continuation = false
11163  }
11164  });
11165 
11166  if (parser->heredoc_end == NULL) {
11167  const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11168 
11169  if (body_start == NULL) {
11170  // If there is no newline after the heredoc identifier, then
11171  // this is not a valid heredoc declaration. In this case we
11172  // will add an error, but we will still return a heredoc
11173  // start.
11174  if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11175  body_start = parser->end;
11176  } else {
11177  // Otherwise, we want to indicate that the body of the
11178  // heredoc starts on the character after the next newline.
11179  pm_newline_list_append(&parser->newline_list, body_start);
11180  body_start++;
11181  }
11182 
11183  parser->next_start = body_start;
11184  } else {
11185  parser->next_start = parser->heredoc_end;
11186  }
11187 
11189  }
11190  }
11191 
11192  if (match(parser, '=')) {
11193  lex_state_set(parser, PM_LEX_STATE_BEG);
11195  }
11196 
11197  if (ambiguous_operator_p(parser, space_seen)) {
11198  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11199  }
11200 
11201  if (lex_state_operator_p(parser)) {
11202  lex_state_set(parser, PM_LEX_STATE_ARG);
11203  } else {
11204  if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11205  lex_state_set(parser, PM_LEX_STATE_BEG);
11206  }
11207 
11208  LEX(PM_TOKEN_LESS_LESS);
11209  }
11210 
11211  if (lex_state_operator_p(parser)) {
11212  lex_state_set(parser, PM_LEX_STATE_ARG);
11213  } else {
11214  if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11215  lex_state_set(parser, PM_LEX_STATE_BEG);
11216  }
11217 
11218  if (match(parser, '=')) {
11219  if (match(parser, '>')) {
11221  }
11222 
11223  LEX(PM_TOKEN_LESS_EQUAL);
11224  }
11225 
11226  LEX(PM_TOKEN_LESS);
11227 
11228  // > >> >>= >=
11229  case '>':
11230  if (match(parser, '>')) {
11231  if (lex_state_operator_p(parser)) {
11232  lex_state_set(parser, PM_LEX_STATE_ARG);
11233  } else {
11234  lex_state_set(parser, PM_LEX_STATE_BEG);
11235  }
11236  LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11237  }
11238 
11239  if (lex_state_operator_p(parser)) {
11240  lex_state_set(parser, PM_LEX_STATE_ARG);
11241  } else {
11242  lex_state_set(parser, PM_LEX_STATE_BEG);
11243  }
11244 
11245  LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11246 
11247  // double-quoted string literal
11248  case '"': {
11249  bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11250  lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11251  LEX(PM_TOKEN_STRING_BEGIN);
11252  }
11253 
11254  // xstring literal
11255  case '`': {
11256  if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11257  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11258  LEX(PM_TOKEN_BACKTICK);
11259  }
11260 
11261  if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11262  if (previous_command_start) {
11263  lex_state_set(parser, PM_LEX_STATE_CMDARG);
11264  } else {
11265  lex_state_set(parser, PM_LEX_STATE_ARG);
11266  }
11267 
11268  LEX(PM_TOKEN_BACKTICK);
11269  }
11270 
11271  lex_mode_push_string(parser, true, false, '\0', '`');
11272  LEX(PM_TOKEN_BACKTICK);
11273  }
11274 
11275  // single-quoted string literal
11276  case '\'': {
11277  bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11278  lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11279  LEX(PM_TOKEN_STRING_BEGIN);
11280  }
11281 
11282  // ? character literal
11283  case '?':
11284  LEX(lex_question_mark(parser));
11285 
11286  // & && &&= &=
11287  case '&': {
11288  if (match(parser, '&')) {
11289  lex_state_set(parser, PM_LEX_STATE_BEG);
11290 
11291  if (match(parser, '=')) {
11293  }
11294 
11296  }
11297 
11298  if (match(parser, '=')) {
11299  lex_state_set(parser, PM_LEX_STATE_BEG);
11301  }
11302 
11303  if (match(parser, '.')) {
11304  lex_state_set(parser, PM_LEX_STATE_DOT);
11306  }
11307 
11309  if (lex_state_spcarg_p(parser, space_seen)) {
11310  if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11311  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11312  } else {
11313  const uint8_t delim = peek_offset(parser, 1);
11314 
11315  if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
11316  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11317  }
11318  }
11319 
11321  } else if (lex_state_beg_p(parser)) {
11323  } else if (ambiguous_operator_p(parser, space_seen)) {
11324  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11325  }
11326 
11327  if (lex_state_operator_p(parser)) {
11328  lex_state_set(parser, PM_LEX_STATE_ARG);
11329  } else {
11330  lex_state_set(parser, PM_LEX_STATE_BEG);
11331  }
11332 
11333  LEX(type);
11334  }
11335 
11336  // | || ||= |=
11337  case '|':
11338  if (match(parser, '|')) {
11339  if (match(parser, '=')) {
11340  lex_state_set(parser, PM_LEX_STATE_BEG);
11342  }
11343 
11344  if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11345  parser->current.end--;
11346  LEX(PM_TOKEN_PIPE);
11347  }
11348 
11349  lex_state_set(parser, PM_LEX_STATE_BEG);
11350  LEX(PM_TOKEN_PIPE_PIPE);
11351  }
11352 
11353  if (match(parser, '=')) {
11354  lex_state_set(parser, PM_LEX_STATE_BEG);
11355  LEX(PM_TOKEN_PIPE_EQUAL);
11356  }
11357 
11358  if (lex_state_operator_p(parser)) {
11359  lex_state_set(parser, PM_LEX_STATE_ARG);
11360  } else {
11361  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11362  }
11363 
11364  LEX(PM_TOKEN_PIPE);
11365 
11366  // + += +@
11367  case '+': {
11368  if (lex_state_operator_p(parser)) {
11369  lex_state_set(parser, PM_LEX_STATE_ARG);
11370 
11371  if (match(parser, '@')) {
11372  LEX(PM_TOKEN_UPLUS);
11373  }
11374 
11375  LEX(PM_TOKEN_PLUS);
11376  }
11377 
11378  if (match(parser, '=')) {
11379  lex_state_set(parser, PM_LEX_STATE_BEG);
11380  LEX(PM_TOKEN_PLUS_EQUAL);
11381  }
11382 
11383  if (
11384  lex_state_beg_p(parser) ||
11385  (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11386  ) {
11387  lex_state_set(parser, PM_LEX_STATE_BEG);
11388 
11389  if (pm_char_is_decimal_digit(peek(parser))) {
11390  parser->current.end++;
11391  pm_token_type_t type = lex_numeric(parser);
11392  lex_state_set(parser, PM_LEX_STATE_END);
11393  LEX(type);
11394  }
11395 
11396  LEX(PM_TOKEN_UPLUS);
11397  }
11398 
11399  if (ambiguous_operator_p(parser, space_seen)) {
11400  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11401  }
11402 
11403  lex_state_set(parser, PM_LEX_STATE_BEG);
11404  LEX(PM_TOKEN_PLUS);
11405  }
11406 
11407  // - -= -@
11408  case '-': {
11409  if (lex_state_operator_p(parser)) {
11410  lex_state_set(parser, PM_LEX_STATE_ARG);
11411 
11412  if (match(parser, '@')) {
11413  LEX(PM_TOKEN_UMINUS);
11414  }
11415 
11416  LEX(PM_TOKEN_MINUS);
11417  }
11418 
11419  if (match(parser, '=')) {
11420  lex_state_set(parser, PM_LEX_STATE_BEG);
11421  LEX(PM_TOKEN_MINUS_EQUAL);
11422  }
11423 
11424  if (match(parser, '>')) {
11425  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11427  }
11428 
11429  bool spcarg = lex_state_spcarg_p(parser, space_seen);
11430  bool is_beg = lex_state_beg_p(parser);
11431  if (!is_beg && spcarg) {
11432  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11433  }
11434 
11435  if (is_beg || spcarg) {
11436  lex_state_set(parser, PM_LEX_STATE_BEG);
11438  }
11439 
11440  if (ambiguous_operator_p(parser, space_seen)) {
11441  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11442  }
11443 
11444  lex_state_set(parser, PM_LEX_STATE_BEG);
11445  LEX(PM_TOKEN_MINUS);
11446  }
11447 
11448  // . .. ...
11449  case '.': {
11450  bool beg_p = lex_state_beg_p(parser);
11451 
11452  if (match(parser, '.')) {
11453  if (match(parser, '.')) {
11454  // If we're _not_ inside a range within default parameters
11455  if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11456  if (lex_state_p(parser, PM_LEX_STATE_END)) {
11457  lex_state_set(parser, PM_LEX_STATE_BEG);
11458  } else {
11459  lex_state_set(parser, PM_LEX_STATE_ENDARG);
11460  }
11461  LEX(PM_TOKEN_UDOT_DOT_DOT);
11462  }
11463 
11464  if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11465  pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11466  }
11467 
11468  lex_state_set(parser, PM_LEX_STATE_BEG);
11470  }
11471 
11472  lex_state_set(parser, PM_LEX_STATE_BEG);
11473  LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11474  }
11475 
11476  lex_state_set(parser, PM_LEX_STATE_DOT);
11477  LEX(PM_TOKEN_DOT);
11478  }
11479 
11480  // integer
11481  case '0':
11482  case '1':
11483  case '2':
11484  case '3':
11485  case '4':
11486  case '5':
11487  case '6':
11488  case '7':
11489  case '8':
11490  case '9': {
11491  pm_token_type_t type = lex_numeric(parser);
11492  lex_state_set(parser, PM_LEX_STATE_END);
11493  LEX(type);
11494  }
11495 
11496  // :: symbol
11497  case ':':
11498  if (match(parser, ':')) {
11499  if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11500  lex_state_set(parser, PM_LEX_STATE_BEG);
11501  LEX(PM_TOKEN_UCOLON_COLON);
11502  }
11503 
11504  lex_state_set(parser, PM_LEX_STATE_DOT);
11505  LEX(PM_TOKEN_COLON_COLON);
11506  }
11507 
11508  if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11509  lex_state_set(parser, PM_LEX_STATE_BEG);
11510  LEX(PM_TOKEN_COLON);
11511  }
11512 
11513  if (peek(parser) == '"' || peek(parser) == '\'') {
11514  lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11515  parser->current.end++;
11516  }
11517 
11518  lex_state_set(parser, PM_LEX_STATE_FNAME);
11519  LEX(PM_TOKEN_SYMBOL_BEGIN);
11520 
11521  // / /=
11522  case '/':
11523  if (lex_state_beg_p(parser)) {
11524  lex_mode_push_regexp(parser, '\0', '/');
11525  LEX(PM_TOKEN_REGEXP_BEGIN);
11526  }
11527 
11528  if (match(parser, '=')) {
11529  lex_state_set(parser, PM_LEX_STATE_BEG);
11530  LEX(PM_TOKEN_SLASH_EQUAL);
11531  }
11532 
11533  if (lex_state_spcarg_p(parser, space_seen)) {
11534  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11535  lex_mode_push_regexp(parser, '\0', '/');
11536  LEX(PM_TOKEN_REGEXP_BEGIN);
11537  }
11538 
11539  if (ambiguous_operator_p(parser, space_seen)) {
11540  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11541  }
11542 
11543  if (lex_state_operator_p(parser)) {
11544  lex_state_set(parser, PM_LEX_STATE_ARG);
11545  } else {
11546  lex_state_set(parser, PM_LEX_STATE_BEG);
11547  }
11548 
11549  LEX(PM_TOKEN_SLASH);
11550 
11551  // ^ ^=
11552  case '^':
11553  if (lex_state_operator_p(parser)) {
11554  lex_state_set(parser, PM_LEX_STATE_ARG);
11555  } else {
11556  lex_state_set(parser, PM_LEX_STATE_BEG);
11557  }
11558  LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11559 
11560  // ~ ~@
11561  case '~':
11562  if (lex_state_operator_p(parser)) {
11563  (void) match(parser, '@');
11564  lex_state_set(parser, PM_LEX_STATE_ARG);
11565  } else {
11566  lex_state_set(parser, PM_LEX_STATE_BEG);
11567  }
11568 
11569  LEX(PM_TOKEN_TILDE);
11570 
11571  // % %= %i %I %q %Q %w %W
11572  case '%': {
11573  // If there is no subsequent character then we have an
11574  // invalid token. We're going to say it's the percent
11575  // operator because we don't want to move into the string
11576  // lex mode unnecessarily.
11577  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11578  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11579  LEX(PM_TOKEN_PERCENT);
11580  }
11581 
11582  if (!lex_state_beg_p(parser) && match(parser, '=')) {
11583  lex_state_set(parser, PM_LEX_STATE_BEG);
11585  } else if (
11586  lex_state_beg_p(parser) ||
11587  (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11588  lex_state_spcarg_p(parser, space_seen)
11589  ) {
11590  if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11591  if (*parser->current.end >= 0x80) {
11592  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11593  }
11594 
11595  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11596  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11597  LEX(PM_TOKEN_STRING_BEGIN);
11598  }
11599 
11600  // Delimiters for %-literals cannot be alphanumeric. We
11601  // validate that here.
11602  uint8_t delimiter = peek_offset(parser, 1);
11603  if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11604  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11605  goto lex_next_token;
11606  }
11607 
11608  switch (peek(parser)) {
11609  case 'i': {
11610  parser->current.end++;
11611 
11612  if (parser->current.end < parser->end) {
11613  lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11614  } else {
11615  lex_mode_push_list_eof(parser);
11616  }
11617 
11619  }
11620  case 'I': {
11621  parser->current.end++;
11622 
11623  if (parser->current.end < parser->end) {
11624  lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11625  } else {
11626  lex_mode_push_list_eof(parser);
11627  }
11628 
11630  }
11631  case 'r': {
11632  parser->current.end++;
11633 
11634  if (parser->current.end < parser->end) {
11635  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11636  lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11637  } else {
11638  lex_mode_push_regexp(parser, '\0', '\0');
11639  }
11640 
11641  LEX(PM_TOKEN_REGEXP_BEGIN);
11642  }
11643  case 'q': {
11644  parser->current.end++;
11645 
11646  if (parser->current.end < parser->end) {
11647  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11648  lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11649  } else {
11650  lex_mode_push_string_eof(parser);
11651  }
11652 
11653  LEX(PM_TOKEN_STRING_BEGIN);
11654  }
11655  case 'Q': {
11656  parser->current.end++;
11657 
11658  if (parser->current.end < parser->end) {
11659  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11660  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11661  } else {
11662  lex_mode_push_string_eof(parser);
11663  }
11664 
11665  LEX(PM_TOKEN_STRING_BEGIN);
11666  }
11667  case 's': {
11668  parser->current.end++;
11669 
11670  if (parser->current.end < parser->end) {
11671  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11672  lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11673  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11674  } else {
11675  lex_mode_push_string_eof(parser);
11676  }
11677 
11678  LEX(PM_TOKEN_SYMBOL_BEGIN);
11679  }
11680  case 'w': {
11681  parser->current.end++;
11682 
11683  if (parser->current.end < parser->end) {
11684  lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11685  } else {
11686  lex_mode_push_list_eof(parser);
11687  }
11688 
11690  }
11691  case 'W': {
11692  parser->current.end++;
11693 
11694  if (parser->current.end < parser->end) {
11695  lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11696  } else {
11697  lex_mode_push_list_eof(parser);
11698  }
11699 
11701  }
11702  case 'x': {
11703  parser->current.end++;
11704 
11705  if (parser->current.end < parser->end) {
11706  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11707  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11708  } else {
11709  lex_mode_push_string_eof(parser);
11710  }
11711 
11713  }
11714  default:
11715  // If we get to this point, then we have a % that is completely
11716  // unparsable. In this case we'll just drop it from the parser
11717  // and skip past it and hope that the next token is something
11718  // that we can parse.
11719  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11720  goto lex_next_token;
11721  }
11722  }
11723 
11724  if (ambiguous_operator_p(parser, space_seen)) {
11725  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11726  }
11727 
11728  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11729  LEX(PM_TOKEN_PERCENT);
11730  }
11731 
11732  // global variable
11733  case '$': {
11734  pm_token_type_t type = lex_global_variable(parser);
11735 
11736  // If we're lexing an embedded variable, then we need to pop back into
11737  // the parent lex context.
11738  if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11739  lex_mode_pop(parser);
11740  }
11741 
11742  lex_state_set(parser, PM_LEX_STATE_END);
11743  LEX(type);
11744  }
11745 
11746  // instance variable, class variable
11747  case '@':
11748  lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11749  LEX(lex_at_variable(parser));
11750 
11751  default: {
11752  if (*parser->current.start != '_') {
11753  size_t width = char_is_identifier_start(parser, parser->current.start);
11754 
11755  // If this isn't the beginning of an identifier, then
11756  // it's an invalid token as we've exhausted all of the
11757  // other options. We'll skip past it and return the next
11758  // token after adding an appropriate error message.
11759  if (!width) {
11760  if (*parser->current.start >= 0x80) {
11761  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11762  } else if (*parser->current.start == '\\') {
11763  switch (peek_at(parser, parser->current.start + 1)) {
11764  case ' ':
11765  parser->current.end++;
11766  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11767  break;
11768  case '\f':
11769  parser->current.end++;
11770  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11771  break;
11772  case '\t':
11773  parser->current.end++;
11774  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11775  break;
11776  case '\v':
11777  parser->current.end++;
11778  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11779  break;
11780  case '\r':
11781  if (peek_at(parser, parser->current.start + 2) != '\n') {
11782  parser->current.end++;
11783  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784  break;
11785  }
11786  /* fallthrough */
11787  default:
11788  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789  break;
11790  }
11791  } else if (char_is_ascii_printable(*parser->current.start)) {
11792  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11793  } else {
11794  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11795  }
11796 
11797  goto lex_next_token;
11798  }
11799 
11800  parser->current.end = parser->current.start + width;
11801  }
11802 
11803  pm_token_type_t type = lex_identifier(parser, previous_command_start);
11804 
11805  // If we've hit a __END__ and it was at the start of the
11806  // line or the start of the file and it is followed by
11807  // either a \n or a \r\n, then this is the last token of the
11808  // file.
11809  if (
11810  ((parser->current.end - parser->current.start) == 7) &&
11811  current_token_starts_line(parser) &&
11812  (memcmp(parser->current.start, "__END__", 7) == 0) &&
11813  (parser->current.end == parser->end || match_eol(parser))
11814  ) {
11815  // Since we know we're about to add an __END__ comment,
11816  // we know we need to add all of the newlines to get the
11817  // correct column information for it.
11818  const uint8_t *cursor = parser->current.end;
11819  while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11820  pm_newline_list_append(&parser->newline_list, cursor++);
11821  }
11822 
11823  parser->current.end = parser->end;
11824  parser->current.type = PM_TOKEN___END__;
11825  parser_lex_callback(parser);
11826 
11827  parser->data_loc.start = parser->current.start;
11828  parser->data_loc.end = parser->current.end;
11829 
11830  LEX(PM_TOKEN_EOF);
11831  }
11832 
11833  pm_lex_state_t last_state = parser->lex_state;
11834 
11836  if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11837  if (previous_command_start) {
11838  lex_state_set(parser, PM_LEX_STATE_CMDARG);
11839  } else {
11840  lex_state_set(parser, PM_LEX_STATE_ARG);
11841  }
11842  } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11843  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11844  } else {
11845  lex_state_set(parser, PM_LEX_STATE_END);
11846  }
11847  }
11848 
11849  if (
11850  !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11851  (type == PM_TOKEN_IDENTIFIER) &&
11852  ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11853  pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11854  ) {
11855  lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11856  }
11857 
11858  LEX(type);
11859  }
11860  }
11861  }
11862  case PM_LEX_LIST: {
11863  if (parser->next_start != NULL) {
11864  parser->current.end = parser->next_start;
11865  parser->next_start = NULL;
11866  }
11867 
11868  // First we'll set the beginning of the token.
11869  parser->current.start = parser->current.end;
11870 
11871  // If there's any whitespace at the start of the list, then we're
11872  // going to trim it off the beginning and create a new token.
11873  size_t whitespace;
11874 
11875  if (parser->heredoc_end) {
11876  whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11877  if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11878  whitespace += 1;
11879  }
11880  } else {
11881  whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11882  }
11883 
11884  if (whitespace > 0) {
11885  parser->current.end += whitespace;
11886  if (peek_offset(parser, -1) == '\n') {
11887  // mutates next_start
11888  parser_flush_heredoc_end(parser);
11889  }
11890  LEX(PM_TOKEN_WORDS_SEP);
11891  }
11892 
11893  // We'll check if we're at the end of the file. If we are, then we
11894  // need to return the EOF token.
11895  if (parser->current.end >= parser->end) {
11896  LEX(PM_TOKEN_EOF);
11897  }
11898 
11899  // Here we'll get a list of the places where strpbrk should break,
11900  // and then find the first one.
11901  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11902  const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11903  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11904 
11905  // If we haven't found an escape yet, then this buffer will be
11906  // unallocated since we can refer directly to the source string.
11907  pm_token_buffer_t token_buffer = { 0 };
11908 
11909  while (breakpoint != NULL) {
11910  // If we hit whitespace, then we must have received content by
11911  // now, so we can return an element of the list.
11912  if (pm_char_is_whitespace(*breakpoint)) {
11913  parser->current.end = breakpoint;
11914  pm_token_buffer_flush(parser, &token_buffer);
11916  }
11917 
11918  // If we hit the terminator, we need to check which token to
11919  // return.
11920  if (*breakpoint == lex_mode->as.list.terminator) {
11921  // If this terminator doesn't actually close the list, then
11922  // we need to continue on past it.
11923  if (lex_mode->as.list.nesting > 0) {
11924  parser->current.end = breakpoint + 1;
11925  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11926  lex_mode->as.list.nesting--;
11927  continue;
11928  }
11929 
11930  // If we've hit the terminator and we've already skipped
11931  // past content, then we can return a list node.
11932  if (breakpoint > parser->current.start) {
11933  parser->current.end = breakpoint;
11934  pm_token_buffer_flush(parser, &token_buffer);
11936  }
11937 
11938  // Otherwise, switch back to the default state and return
11939  // the end of the list.
11940  parser->current.end = breakpoint + 1;
11941  lex_mode_pop(parser);
11942  lex_state_set(parser, PM_LEX_STATE_END);
11943  LEX(PM_TOKEN_STRING_END);
11944  }
11945 
11946  // If we hit a null byte, skip directly past it.
11947  if (*breakpoint == '\0') {
11948  breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11949  continue;
11950  }
11951 
11952  // If we hit escapes, then we need to treat the next token
11953  // literally. In this case we'll skip past the next character
11954  // and find the next breakpoint.
11955  if (*breakpoint == '\\') {
11956  parser->current.end = breakpoint + 1;
11957 
11958  // If we've hit the end of the file, then break out of the
11959  // loop by setting the breakpoint to NULL.
11960  if (parser->current.end == parser->end) {
11961  breakpoint = NULL;
11962  continue;
11963  }
11964 
11965  pm_token_buffer_escape(parser, &token_buffer);
11966  uint8_t peeked = peek(parser);
11967 
11968  switch (peeked) {
11969  case ' ':
11970  case '\f':
11971  case '\t':
11972  case '\v':
11973  case '\\':
11974  pm_token_buffer_push_byte(&token_buffer, peeked);
11975  parser->current.end++;
11976  break;
11977  case '\r':
11978  parser->current.end++;
11979  if (peek(parser) != '\n') {
11980  pm_token_buffer_push_byte(&token_buffer, '\r');
11981  break;
11982  }
11983  /* fallthrough */
11984  case '\n':
11985  pm_token_buffer_push_byte(&token_buffer, '\n');
11986 
11987  if (parser->heredoc_end) {
11988  // ... if we are on the same line as a heredoc,
11989  // flush the heredoc and continue parsing after
11990  // heredoc_end.
11991  parser_flush_heredoc_end(parser);
11992  pm_token_buffer_copy(parser, &token_buffer);
11994  } else {
11995  // ... else track the newline.
11996  pm_newline_list_append(&parser->newline_list, parser->current.end);
11997  }
11998 
11999  parser->current.end++;
12000  break;
12001  default:
12002  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12003  pm_token_buffer_push_byte(&token_buffer, peeked);
12004  parser->current.end++;
12005  } else if (lex_mode->as.list.interpolation) {
12006  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12007  } else {
12008  pm_token_buffer_push_byte(&token_buffer, '\\');
12009  pm_token_buffer_push_escaped(&token_buffer, parser);
12010  }
12011 
12012  break;
12013  }
12014 
12015  token_buffer.cursor = parser->current.end;
12016  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12017  continue;
12018  }
12019 
12020  // If we hit a #, then we will attempt to lex interpolation.
12021  if (*breakpoint == '#') {
12022  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12023 
12024  if (type == PM_TOKEN_NOT_PROVIDED) {
12025  // If we haven't returned at this point then we had something
12026  // that looked like an interpolated class or instance variable
12027  // like "#@" but wasn't actually. In this case we'll just skip
12028  // to the next breakpoint.
12029  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12030  continue;
12031  }
12032 
12033  if (type == PM_TOKEN_STRING_CONTENT) {
12034  pm_token_buffer_flush(parser, &token_buffer);
12035  }
12036 
12037  LEX(type);
12038  }
12039 
12040  // If we've hit the incrementor, then we need to skip past it
12041  // and find the next breakpoint.
12042  assert(*breakpoint == lex_mode->as.list.incrementor);
12043  parser->current.end = breakpoint + 1;
12044  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12045  lex_mode->as.list.nesting++;
12046  continue;
12047  }
12048 
12049  if (parser->current.end > parser->current.start) {
12050  pm_token_buffer_flush(parser, &token_buffer);
12052  }
12053 
12054  // If we were unable to find a breakpoint, then this token hits the
12055  // end of the file.
12056  parser->current.end = parser->end;
12057  pm_token_buffer_flush(parser, &token_buffer);
12059  }
12060  case PM_LEX_REGEXP: {
12061  // First, we'll set to start of this token to be the current end.
12062  if (parser->next_start == NULL) {
12063  parser->current.start = parser->current.end;
12064  } else {
12065  parser->current.start = parser->next_start;
12066  parser->current.end = parser->next_start;
12067  parser->next_start = NULL;
12068  }
12069 
12070  // We'll check if we're at the end of the file. If we are, then we
12071  // need to return the EOF token.
12072  if (parser->current.end >= parser->end) {
12073  LEX(PM_TOKEN_EOF);
12074  }
12075 
12076  // Get a reference to the current mode.
12077  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12078 
12079  // These are the places where we need to split up the content of the
12080  // regular expression. We'll use strpbrk to find the first of these
12081  // characters.
12082  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12083  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12084  pm_regexp_token_buffer_t token_buffer = { 0 };
12085 
12086  while (breakpoint != NULL) {
12087  // If we hit the terminator, we need to determine what kind of
12088  // token to return.
12089  if (*breakpoint == lex_mode->as.regexp.terminator) {
12090  if (lex_mode->as.regexp.nesting > 0) {
12091  parser->current.end = breakpoint + 1;
12092  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12093  lex_mode->as.regexp.nesting--;
12094  continue;
12095  }
12096 
12097  // Here we've hit the terminator. If we have already consumed
12098  // content then we need to return that content as string content
12099  // first.
12100  if (breakpoint > parser->current.start) {
12101  parser->current.end = breakpoint;
12102  pm_regexp_token_buffer_flush(parser, &token_buffer);
12104  }
12105 
12106  // Check here if we need to track the newline.
12107  size_t eol_length = match_eol_at(parser, breakpoint);
12108  if (eol_length) {
12109  parser->current.end = breakpoint + eol_length;
12110  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12111  } else {
12112  parser->current.end = breakpoint + 1;
12113  }
12114 
12115  // Since we've hit the terminator of the regular expression,
12116  // we now need to parse the options.
12117  parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12118 
12119  lex_mode_pop(parser);
12120  lex_state_set(parser, PM_LEX_STATE_END);
12121  LEX(PM_TOKEN_REGEXP_END);
12122  }
12123 
12124  // If we've hit the incrementor, then we need to skip past it
12125  // and find the next breakpoint.
12126  if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12127  parser->current.end = breakpoint + 1;
12128  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12129  lex_mode->as.regexp.nesting++;
12130  continue;
12131  }
12132 
12133  switch (*breakpoint) {
12134  case '\0':
12135  // If we hit a null byte, skip directly past it.
12136  parser->current.end = breakpoint + 1;
12137  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12138  break;
12139  case '\r':
12140  if (peek_at(parser, breakpoint + 1) != '\n') {
12141  parser->current.end = breakpoint + 1;
12142  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12143  break;
12144  }
12145 
12146  breakpoint++;
12147  parser->current.end = breakpoint;
12148  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149  token_buffer.base.cursor = breakpoint;
12150 
12151  /* fallthrough */
12152  case '\n':
12153  // If we've hit a newline, then we need to track that in
12154  // the list of newlines.
12155  if (parser->heredoc_end == NULL) {
12156  pm_newline_list_append(&parser->newline_list, breakpoint);
12157  parser->current.end = breakpoint + 1;
12158  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12159  break;
12160  }
12161 
12162  parser->current.end = breakpoint + 1;
12163  parser_flush_heredoc_end(parser);
12164  pm_regexp_token_buffer_flush(parser, &token_buffer);
12166  case '\\': {
12167  // If we hit escapes, then we need to treat the next
12168  // token literally. In this case we'll skip past the
12169  // next character and find the next breakpoint.
12170  parser->current.end = breakpoint + 1;
12171 
12172  // If we've hit the end of the file, then break out of
12173  // the loop by setting the breakpoint to NULL.
12174  if (parser->current.end == parser->end) {
12175  breakpoint = NULL;
12176  break;
12177  }
12178 
12179  pm_regexp_token_buffer_escape(parser, &token_buffer);
12180  uint8_t peeked = peek(parser);
12181 
12182  switch (peeked) {
12183  case '\r':
12184  parser->current.end++;
12185  if (peek(parser) != '\n') {
12186  if (lex_mode->as.regexp.terminator != '\r') {
12187  pm_token_buffer_push_byte(&token_buffer.base, '\\');
12188  }
12189  pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12190  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191  break;
12192  }
12193  /* fallthrough */
12194  case '\n':
12195  if (parser->heredoc_end) {
12196  // ... if we are on the same line as a heredoc,
12197  // flush the heredoc and continue parsing after
12198  // heredoc_end.
12199  parser_flush_heredoc_end(parser);
12200  pm_regexp_token_buffer_copy(parser, &token_buffer);
12202  } else {
12203  // ... else track the newline.
12204  pm_newline_list_append(&parser->newline_list, parser->current.end);
12205  }
12206 
12207  parser->current.end++;
12208  break;
12209  case 'c':
12210  case 'C':
12211  case 'M':
12212  case 'u':
12213  case 'x':
12214  escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12215  break;
12216  default:
12217  if (lex_mode->as.regexp.terminator == peeked) {
12218  // Some characters when they are used as the
12219  // terminator also receive an escape. They are
12220  // enumerated here.
12221  switch (peeked) {
12222  case '$': case ')': case '*': case '+':
12223  case '.': case '>': case '?': case ']':
12224  case '^': case '|': case '}':
12225  pm_token_buffer_push_byte(&token_buffer.base, '\\');
12226  break;
12227  default:
12228  break;
12229  }
12230 
12231  pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12232  pm_token_buffer_push_byte(&token_buffer.base, peeked);
12233  parser->current.end++;
12234  break;
12235  }
12236 
12237  if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12238  pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12239  break;
12240  }
12241 
12242  token_buffer.base.cursor = parser->current.end;
12243  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12244  break;
12245  }
12246  case '#': {
12247  // If we hit a #, then we will attempt to lex
12248  // interpolation.
12249  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12250 
12251  if (type == PM_TOKEN_NOT_PROVIDED) {
12252  // If we haven't returned at this point then we had
12253  // something that looked like an interpolated class or
12254  // instance variable like "#@" but wasn't actually. In
12255  // this case we'll just skip to the next breakpoint.
12256  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12257  break;
12258  }
12259 
12260  if (type == PM_TOKEN_STRING_CONTENT) {
12261  pm_regexp_token_buffer_flush(parser, &token_buffer);
12262  }
12263 
12264  LEX(type);
12265  }
12266  default:
12267  assert(false && "unreachable");
12268  break;
12269  }
12270  }
12271 
12272  if (parser->current.end > parser->current.start) {
12273  pm_regexp_token_buffer_flush(parser, &token_buffer);
12275  }
12276 
12277  // If we were unable to find a breakpoint, then this token hits the
12278  // end of the file.
12279  parser->current.end = parser->end;
12280  pm_regexp_token_buffer_flush(parser, &token_buffer);
12282  }
12283  case PM_LEX_STRING: {
12284  // First, we'll set to start of this token to be the current end.
12285  if (parser->next_start == NULL) {
12286  parser->current.start = parser->current.end;
12287  } else {
12288  parser->current.start = parser->next_start;
12289  parser->current.end = parser->next_start;
12290  parser->next_start = NULL;
12291  }
12292 
12293  // We'll check if we're at the end of the file. If we are, then we need to
12294  // return the EOF token.
12295  if (parser->current.end >= parser->end) {
12296  LEX(PM_TOKEN_EOF);
12297  }
12298 
12299  // These are the places where we need to split up the content of the
12300  // string. We'll use strpbrk to find the first of these characters.
12301  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12302  const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12303  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12304 
12305  // If we haven't found an escape yet, then this buffer will be
12306  // unallocated since we can refer directly to the source string.
12307  pm_token_buffer_t token_buffer = { 0 };
12308 
12309  while (breakpoint != NULL) {
12310  // If we hit the incrementor, then we'll increment then nesting and
12311  // continue lexing.
12312  if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12313  lex_mode->as.string.nesting++;
12314  parser->current.end = breakpoint + 1;
12315  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12316  continue;
12317  }
12318 
12319  // Note that we have to check the terminator here first because we could
12320  // potentially be parsing a % string that has a # character as the
12321  // terminator.
12322  if (*breakpoint == lex_mode->as.string.terminator) {
12323  // If this terminator doesn't actually close the string, then we need
12324  // to continue on past it.
12325  if (lex_mode->as.string.nesting > 0) {
12326  parser->current.end = breakpoint + 1;
12327  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12328  lex_mode->as.string.nesting--;
12329  continue;
12330  }
12331 
12332  // Here we've hit the terminator. If we have already consumed content
12333  // then we need to return that content as string content first.
12334  if (breakpoint > parser->current.start) {
12335  parser->current.end = breakpoint;
12336  pm_token_buffer_flush(parser, &token_buffer);
12338  }
12339 
12340  // Otherwise we need to switch back to the parent lex mode and
12341  // return the end of the string.
12342  size_t eol_length = match_eol_at(parser, breakpoint);
12343  if (eol_length) {
12344  parser->current.end = breakpoint + eol_length;
12345  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12346  } else {
12347  parser->current.end = breakpoint + 1;
12348  }
12349 
12350  if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12351  parser->current.end++;
12352  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12353  lex_mode_pop(parser);
12354  LEX(PM_TOKEN_LABEL_END);
12355  }
12356 
12357  lex_state_set(parser, PM_LEX_STATE_END);
12358  lex_mode_pop(parser);
12359  LEX(PM_TOKEN_STRING_END);
12360  }
12361 
12362  switch (*breakpoint) {
12363  case '\0':
12364  // Skip directly past the null character.
12365  parser->current.end = breakpoint + 1;
12366  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12367  break;
12368  case '\r':
12369  if (peek_at(parser, breakpoint + 1) != '\n') {
12370  parser->current.end = breakpoint + 1;
12371  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12372  break;
12373  }
12374 
12375  // If we hit a \r\n sequence, then we need to treat it
12376  // as a newline.
12377  breakpoint++;
12378  parser->current.end = breakpoint;
12379  pm_token_buffer_escape(parser, &token_buffer);
12380  token_buffer.cursor = breakpoint;
12381 
12382  /* fallthrough */
12383  case '\n':
12384  // When we hit a newline, we need to flush any potential
12385  // heredocs. Note that this has to happen after we check
12386  // for the terminator in case the terminator is a
12387  // newline character.
12388  if (parser->heredoc_end == NULL) {
12389  pm_newline_list_append(&parser->newline_list, breakpoint);
12390  parser->current.end = breakpoint + 1;
12391  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12392  break;
12393  }
12394 
12395  parser->current.end = breakpoint + 1;
12396  parser_flush_heredoc_end(parser);
12397  pm_token_buffer_flush(parser, &token_buffer);
12399  case '\\': {
12400  // Here we hit escapes.
12401  parser->current.end = breakpoint + 1;
12402 
12403  // If we've hit the end of the file, then break out of
12404  // the loop by setting the breakpoint to NULL.
12405  if (parser->current.end == parser->end) {
12406  breakpoint = NULL;
12407  continue;
12408  }
12409 
12410  pm_token_buffer_escape(parser, &token_buffer);
12411  uint8_t peeked = peek(parser);
12412 
12413  switch (peeked) {
12414  case '\\':
12415  pm_token_buffer_push_byte(&token_buffer, '\\');
12416  parser->current.end++;
12417  break;
12418  case '\r':
12419  parser->current.end++;
12420  if (peek(parser) != '\n') {
12421  if (!lex_mode->as.string.interpolation) {
12422  pm_token_buffer_push_byte(&token_buffer, '\\');
12423  }
12424  pm_token_buffer_push_byte(&token_buffer, '\r');
12425  break;
12426  }
12427  /* fallthrough */
12428  case '\n':
12429  if (!lex_mode->as.string.interpolation) {
12430  pm_token_buffer_push_byte(&token_buffer, '\\');
12431  pm_token_buffer_push_byte(&token_buffer, '\n');
12432  }
12433 
12434  if (parser->heredoc_end) {
12435  // ... if we are on the same line as a heredoc,
12436  // flush the heredoc and continue parsing after
12437  // heredoc_end.
12438  parser_flush_heredoc_end(parser);
12439  pm_token_buffer_copy(parser, &token_buffer);
12441  } else {
12442  // ... else track the newline.
12443  pm_newline_list_append(&parser->newline_list, parser->current.end);
12444  }
12445 
12446  parser->current.end++;
12447  break;
12448  default:
12449  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12450  pm_token_buffer_push_byte(&token_buffer, peeked);
12451  parser->current.end++;
12452  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12453  pm_token_buffer_push_byte(&token_buffer, peeked);
12454  parser->current.end++;
12455  } else if (lex_mode->as.string.interpolation) {
12456  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12457  } else {
12458  pm_token_buffer_push_byte(&token_buffer, '\\');
12459  pm_token_buffer_push_escaped(&token_buffer, parser);
12460  }
12461 
12462  break;
12463  }
12464 
12465  token_buffer.cursor = parser->current.end;
12466  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12467  break;
12468  }
12469  case '#': {
12470  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12471 
12472  if (type == PM_TOKEN_NOT_PROVIDED) {
12473  // If we haven't returned at this point then we had something that
12474  // looked like an interpolated class or instance variable like "#@"
12475  // but wasn't actually. In this case we'll just skip to the next
12476  // breakpoint.
12477  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12478  break;
12479  }
12480 
12481  if (type == PM_TOKEN_STRING_CONTENT) {
12482  pm_token_buffer_flush(parser, &token_buffer);
12483  }
12484 
12485  LEX(type);
12486  }
12487  default:
12488  assert(false && "unreachable");
12489  }
12490  }
12491 
12492  if (parser->current.end > parser->current.start) {
12493  pm_token_buffer_flush(parser, &token_buffer);
12495  }
12496 
12497  // If we've hit the end of the string, then this is an unterminated
12498  // string. In that case we'll return a string content token.
12499  parser->current.end = parser->end;
12500  pm_token_buffer_flush(parser, &token_buffer);
12502  }
12503  case PM_LEX_HEREDOC: {
12504  // First, we'll set to start of this token.
12505  if (parser->next_start == NULL) {
12506  parser->current.start = parser->current.end;
12507  } else {
12508  parser->current.start = parser->next_start;
12509  parser->current.end = parser->next_start;
12510  parser->heredoc_end = NULL;
12511  parser->next_start = NULL;
12512  }
12513 
12514  // Now let's grab the information about the identifier off of the
12515  // current lex mode.
12516  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12517  pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12518 
12519  bool line_continuation = lex_mode->as.heredoc.line_continuation;
12520  lex_mode->as.heredoc.line_continuation = false;
12521 
12522  // We'll check if we're at the end of the file. If we are, then we
12523  // will add an error (because we weren't able to find the
12524  // terminator) but still continue parsing so that content after the
12525  // declaration of the heredoc can be parsed.
12526  if (parser->current.end >= parser->end) {
12527  pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12528  parser->next_start = lex_mode->as.heredoc.next_start;
12529  parser->heredoc_end = parser->current.end;
12530  lex_state_set(parser, PM_LEX_STATE_END);
12531  lex_mode_pop(parser);
12532  LEX(PM_TOKEN_HEREDOC_END);
12533  }
12534 
12535  const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12536  size_t ident_length = heredoc_lex_mode->ident_length;
12537 
12538  // If we are immediately following a newline and we have hit the
12539  // terminator, then we need to return the ending of the heredoc.
12540  if (current_token_starts_line(parser)) {
12541  const uint8_t *start = parser->current.start;
12542 
12543  if (!line_continuation && (start + ident_length <= parser->end)) {
12544  const uint8_t *newline = next_newline(start, parser->end - start);
12545  const uint8_t *ident_end = newline;
12546  const uint8_t *terminator_end = newline;
12547 
12548  if (newline == NULL) {
12549  terminator_end = parser->end;
12550  ident_end = parser->end;
12551  } else {
12552  terminator_end++;
12553  if (newline[-1] == '\r') {
12554  ident_end--; // Remove \r
12555  }
12556  }
12557 
12558  const uint8_t *terminator_start = ident_end - ident_length;
12559  const uint8_t *cursor = start;
12560 
12561  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12562  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12563  cursor++;
12564  }
12565  }
12566 
12567  if (
12568  (cursor == terminator_start) &&
12569  (memcmp(terminator_start, ident_start, ident_length) == 0)
12570  ) {
12571  if (newline != NULL) {
12572  pm_newline_list_append(&parser->newline_list, newline);
12573  }
12574 
12575  parser->current.end = terminator_end;
12576  if (*lex_mode->as.heredoc.next_start == '\\') {
12577  parser->next_start = NULL;
12578  } else {
12579  parser->next_start = lex_mode->as.heredoc.next_start;
12580  parser->heredoc_end = parser->current.end;
12581  }
12582 
12583  lex_state_set(parser, PM_LEX_STATE_END);
12584  lex_mode_pop(parser);
12585  LEX(PM_TOKEN_HEREDOC_END);
12586  }
12587  }
12588 
12589  size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12590  if (
12591  heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12592  lex_mode->as.heredoc.common_whitespace != NULL &&
12593  (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12594  peek_at(parser, start) != '\n'
12595  ) {
12596  *lex_mode->as.heredoc.common_whitespace = whitespace;
12597  }
12598  }
12599 
12600  // Otherwise we'll be parsing string content. These are the places
12601  // where we need to split up the content of the heredoc. We'll use
12602  // strpbrk to find the first of these characters.
12603  uint8_t breakpoints[] = "\r\n\\#";
12604 
12605  pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12606  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12607  breakpoints[3] = '\0';
12608  }
12609 
12610  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12611  pm_token_buffer_t token_buffer = { 0 };
12612  bool was_line_continuation = false;
12613 
12614  while (breakpoint != NULL) {
12615  switch (*breakpoint) {
12616  case '\0':
12617  // Skip directly past the null character.
12618  parser->current.end = breakpoint + 1;
12619  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12620  break;
12621  case '\r':
12622  parser->current.end = breakpoint + 1;
12623 
12624  if (peek_at(parser, breakpoint + 1) != '\n') {
12625  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12626  break;
12627  }
12628 
12629  // If we hit a \r\n sequence, then we want to replace it
12630  // with a single \n character in the final string.
12631  breakpoint++;
12632  pm_token_buffer_escape(parser, &token_buffer);
12633  token_buffer.cursor = breakpoint;
12634 
12635  /* fallthrough */
12636  case '\n': {
12637  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12638  parser_flush_heredoc_end(parser);
12639  parser->current.end = breakpoint + 1;
12640  pm_token_buffer_flush(parser, &token_buffer);
12642  }
12643 
12644  pm_newline_list_append(&parser->newline_list, breakpoint);
12645 
12646  // If we have a - or ~ heredoc, then we can match after
12647  // some leading whitespace.
12648  const uint8_t *start = breakpoint + 1;
12649 
12650  if (!was_line_continuation && (start + ident_length <= parser->end)) {
12651  // We want to match the terminator starting from the end of the line in case
12652  // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12653  const uint8_t *newline = next_newline(start, parser->end - start);
12654 
12655  if (newline == NULL) {
12656  newline = parser->end;
12657  } else if (newline[-1] == '\r') {
12658  newline--; // Remove \r
12659  }
12660 
12661  // Start of a possible terminator.
12662  const uint8_t *terminator_start = newline - ident_length;
12663 
12664  // Cursor to check for the leading whitespace. We skip the
12665  // leading whitespace if we have a - or ~ heredoc.
12666  const uint8_t *cursor = start;
12667 
12668  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12669  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12670  cursor++;
12671  }
12672  }
12673 
12674  if (
12675  cursor == terminator_start &&
12676  (memcmp(terminator_start, ident_start, ident_length) == 0)
12677  ) {
12678  parser->current.end = breakpoint + 1;
12679  pm_token_buffer_flush(parser, &token_buffer);
12681  }
12682  }
12683 
12684  size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12685 
12686  // If we have hit a newline that is followed by a valid
12687  // terminator, then we need to return the content of the
12688  // heredoc here as string content. Then, the next time a
12689  // token is lexed, it will match again and return the
12690  // end of the heredoc.
12691  if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12692  if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12693  *lex_mode->as.heredoc.common_whitespace = whitespace;
12694  }
12695 
12696  parser->current.end = breakpoint + 1;
12697  pm_token_buffer_flush(parser, &token_buffer);
12699  }
12700 
12701  // Otherwise we hit a newline and it wasn't followed by
12702  // a terminator, so we can continue parsing.
12703  parser->current.end = breakpoint + 1;
12704  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12705  break;
12706  }
12707  case '\\': {
12708  // If we hit an escape, then we need to skip past
12709  // however many characters the escape takes up. However
12710  // it's important that if \n or \r\n are escaped, we
12711  // stop looping before the newline and not after the
12712  // newline so that we can still potentially find the
12713  // terminator of the heredoc.
12714  parser->current.end = breakpoint + 1;
12715 
12716  // If we've hit the end of the file, then break out of
12717  // the loop by setting the breakpoint to NULL.
12718  if (parser->current.end == parser->end) {
12719  breakpoint = NULL;
12720  continue;
12721  }
12722 
12723  pm_token_buffer_escape(parser, &token_buffer);
12724  uint8_t peeked = peek(parser);
12725 
12726  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12727  switch (peeked) {
12728  case '\r':
12729  parser->current.end++;
12730  if (peek(parser) != '\n') {
12731  pm_token_buffer_push_byte(&token_buffer, '\\');
12732  pm_token_buffer_push_byte(&token_buffer, '\r');
12733  break;
12734  }
12735  /* fallthrough */
12736  case '\n':
12737  pm_token_buffer_push_byte(&token_buffer, '\\');
12738  pm_token_buffer_push_byte(&token_buffer, '\n');
12739  token_buffer.cursor = parser->current.end + 1;
12740  breakpoint = parser->current.end;
12741  continue;
12742  default:
12743  pm_token_buffer_push_byte(&token_buffer, '\\');
12744  pm_token_buffer_push_escaped(&token_buffer, parser);
12745  break;
12746  }
12747  } else {
12748  switch (peeked) {
12749  case '\r':
12750  parser->current.end++;
12751  if (peek(parser) != '\n') {
12752  pm_token_buffer_push_byte(&token_buffer, '\r');
12753  break;
12754  }
12755  /* fallthrough */
12756  case '\n':
12757  // If we are in a tilde here, we should
12758  // break out of the loop and return the
12759  // string content.
12760  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12761  const uint8_t *end = parser->current.end;
12762  pm_newline_list_append(&parser->newline_list, end);
12763 
12764  // Here we want the buffer to only
12765  // include up to the backslash.
12766  parser->current.end = breakpoint;
12767  pm_token_buffer_flush(parser, &token_buffer);
12768 
12769  // Now we can advance the end of the
12770  // token past the newline.
12771  parser->current.end = end + 1;
12772  lex_mode->as.heredoc.line_continuation = true;
12774  }
12775 
12776  was_line_continuation = true;
12777  token_buffer.cursor = parser->current.end + 1;
12778  breakpoint = parser->current.end;
12779  continue;
12780  default:
12781  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12782  break;
12783  }
12784  }
12785 
12786  token_buffer.cursor = parser->current.end;
12787  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12788  break;
12789  }
12790  case '#': {
12791  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12792 
12793  if (type == PM_TOKEN_NOT_PROVIDED) {
12794  // If we haven't returned at this point then we had
12795  // something that looked like an interpolated class
12796  // or instance variable like "#@" but wasn't
12797  // actually. In this case we'll just skip to the
12798  // next breakpoint.
12799  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12800  break;
12801  }
12802 
12803  if (type == PM_TOKEN_STRING_CONTENT) {
12804  pm_token_buffer_flush(parser, &token_buffer);
12805  }
12806 
12807  LEX(type);
12808  }
12809  default:
12810  assert(false && "unreachable");
12811  }
12812 
12813  was_line_continuation = false;
12814  }
12815 
12816  if (parser->current.end > parser->current.start) {
12817  parser->current.end = parser->end;
12818  pm_token_buffer_flush(parser, &token_buffer);
12820  }
12821 
12822  // If we've hit the end of the string, then this is an unterminated
12823  // heredoc. In that case we'll return a string content token.
12824  parser->current.end = parser->end;
12825  pm_token_buffer_flush(parser, &token_buffer);
12827  }
12828  }
12829 
12830  assert(false && "unreachable");
12831 }
12832 
12833 #undef LEX
12834 
12835 /******************************************************************************/
12836 /* Parse functions */
12837 /******************************************************************************/
12838 
12847 typedef enum {
12848  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12849  PM_BINDING_POWER_STATEMENT = 2,
12850  PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12851  PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12852  PM_BINDING_POWER_COMPOSITION = 8, // and or
12853  PM_BINDING_POWER_NOT = 10, // not
12854  PM_BINDING_POWER_MATCH = 12, // => in
12855  PM_BINDING_POWER_DEFINED = 14, // defined?
12856  PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12857  PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12858  PM_BINDING_POWER_TERNARY = 20, // ?:
12859  PM_BINDING_POWER_RANGE = 22, // .. ...
12860  PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12861  PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12862  PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12863  PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12864  PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12865  PM_BINDING_POWER_BITWISE_AND = 34, // &
12866  PM_BINDING_POWER_SHIFT = 36, // << >>
12867  PM_BINDING_POWER_TERM = 38, // + -
12868  PM_BINDING_POWER_FACTOR = 40, // * / %
12869  PM_BINDING_POWER_UMINUS = 42, // -@
12870  PM_BINDING_POWER_EXPONENT = 44, // **
12871  PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12872  PM_BINDING_POWER_INDEX = 48, // [] []=
12873  PM_BINDING_POWER_CALL = 50, // :: .
12874  PM_BINDING_POWER_MAX = 52
12875 } pm_binding_power_t;
12876 
12881 typedef struct {
12883  pm_binding_power_t left;
12884 
12886  pm_binding_power_t right;
12887 
12889  bool binary;
12890 
12895  bool nonassoc;
12897 
12898 #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12899 #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12900 #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12901 #define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12902 #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12903 
12904 pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12905  // rescue
12906  [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
12907 
12908  // if unless until while
12909  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12910  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12911  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12912  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12913 
12914  // and or
12915  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12916  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12917 
12918  // => in
12919  [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12920  [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12921 
12922  // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12923  [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12924  [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12925  [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12926  [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12927  [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12928  [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12929  [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12930  [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12931  [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12932  [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12933  [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12934  [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12935  [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12936  [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12937 
12938  // ?:
12939  [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12940 
12941  // .. ...
12942  [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12943  [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12944  [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12945  [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12946 
12947  // ||
12948  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12949 
12950  // &&
12951  [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12952 
12953  // != !~ == === =~ <=>
12954  [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12955  [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12956  [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12957  [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12958  [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12959  [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12960 
12961  // > >= < <=
12962  [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12963  [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12964  [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12965  [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12966 
12967  // ^ |
12968  [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12969  [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12970 
12971  // &
12972  [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12973 
12974  // >> <<
12975  [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12976  [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12977 
12978  // - +
12979  [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12980  [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12981 
12982  // % / *
12983  [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12984  [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12985  [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12986  [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12987 
12988  // -@
12989  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12990  [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12991 
12992  // **
12993  [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12994  [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12995 
12996  // ! ~ +@
12997  [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12998  [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12999  [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13000 
13001  // [
13002  [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13003 
13004  // :: . &.
13005  [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13006  [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13007  [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13008 };
13009 
13010 #undef BINDING_POWER_ASSIGNMENT
13011 #undef LEFT_ASSOCIATIVE
13012 #undef RIGHT_ASSOCIATIVE
13013 #undef RIGHT_ASSOCIATIVE_UNARY
13014 
13018 static inline bool
13019 match1(const pm_parser_t *parser, pm_token_type_t type) {
13020  return parser->current.type == type;
13021 }
13022 
13026 static inline bool
13027 match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13028  return match1(parser, type1) || match1(parser, type2);
13029 }
13030 
13034 static inline bool
13035 match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13036  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13037 }
13038 
13042 static inline bool
13043 match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13044  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045 }
13046 
13050 static inline bool
13051 match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053 }
13054 
13058 static inline bool
13059 match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13060  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13061 }
13062 
13066 static inline bool
13067 match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13068  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069 }
13070 
13077 static bool
13078 accept1(pm_parser_t *parser, pm_token_type_t type) {
13079  if (match1(parser, type)) {
13080  parser_lex(parser);
13081  return true;
13082  }
13083  return false;
13084 }
13085 
13090 static inline bool
13091 accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13092  if (match2(parser, type1, type2)) {
13093  parser_lex(parser);
13094  return true;
13095  }
13096  return false;
13097 }
13098 
13103 static inline bool
13104 accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105  if (match3(parser, type1, type2, type3)) {
13106  parser_lex(parser);
13107  return true;
13108  }
13109  return false;
13110 }
13111 
13123 static void
13124 expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13125  if (accept1(parser, type)) return;
13126 
13127  const uint8_t *location = parser->previous.end;
13128  pm_parser_err(parser, location, location, diag_id);
13129 
13130  parser->previous.start = location;
13131  parser->previous.type = PM_TOKEN_MISSING;
13132 }
13133 
13138 static void
13139 expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13140  if (accept2(parser, type1, type2)) return;
13141 
13142  const uint8_t *location = parser->previous.end;
13143  pm_parser_err(parser, location, location, diag_id);
13144 
13145  parser->previous.start = location;
13146  parser->previous.type = PM_TOKEN_MISSING;
13147 }
13148 
13152 static void
13153 expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154  if (accept3(parser, type1, type2, type3)) return;
13155 
13156  const uint8_t *location = parser->previous.end;
13157  pm_parser_err(parser, location, location, diag_id);
13158 
13159  parser->previous.start = location;
13160  parser->previous.type = PM_TOKEN_MISSING;
13161 }
13162 
13167 static void
13168 expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13169  if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13170  parser_lex(parser);
13171  } else {
13172  pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13173  parser->previous.start = parser->previous.end;
13174  parser->previous.type = PM_TOKEN_MISSING;
13175  }
13176 }
13177 
13178 static pm_node_t *
13179 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13180 
13185 static pm_node_t *
13186 parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13187  pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13188  pm_assert_value_expression(parser, node);
13189  return node;
13190 }
13191 
13210 static inline bool
13211 token_begins_expression_p(pm_token_type_t type) {
13212  switch (type) {
13214  case PM_TOKEN_KEYWORD_IN:
13215  // We need to special case this because it is a binary operator that
13216  // should not be marked as beginning an expression.
13217  return false;
13218  case PM_TOKEN_BRACE_RIGHT:
13220  case PM_TOKEN_COLON:
13221  case PM_TOKEN_COMMA:
13222  case PM_TOKEN_EMBEXPR_END:
13223  case PM_TOKEN_EOF:
13224  case PM_TOKEN_LAMBDA_BEGIN:
13225  case PM_TOKEN_KEYWORD_DO:
13227  case PM_TOKEN_KEYWORD_END:
13228  case PM_TOKEN_KEYWORD_ELSE:
13231  case PM_TOKEN_KEYWORD_THEN:
13233  case PM_TOKEN_KEYWORD_WHEN:
13234  case PM_TOKEN_NEWLINE:
13236  case PM_TOKEN_SEMICOLON:
13237  // The reason we need this short-circuit is because we're using the
13238  // binding powers table to tell us if the subsequent token could
13239  // potentially be the start of an expression. If there _is_ a binding
13240  // power for one of these tokens, then we should remove it from this list
13241  // and let it be handled by the default case below.
13242  assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13243  return false;
13244  case PM_TOKEN_UAMPERSAND:
13245  // This is a special case because this unary operator cannot appear
13246  // as a general operator, it only appears in certain circumstances.
13247  return false;
13248  case PM_TOKEN_UCOLON_COLON:
13249  case PM_TOKEN_UMINUS:
13250  case PM_TOKEN_UMINUS_NUM:
13251  case PM_TOKEN_UPLUS:
13252  case PM_TOKEN_BANG:
13253  case PM_TOKEN_TILDE:
13254  case PM_TOKEN_UDOT_DOT:
13255  case PM_TOKEN_UDOT_DOT_DOT:
13256  // These unary tokens actually do have binding power associated with them
13257  // so that we can correctly place them into the precedence order. But we
13258  // want them to be marked as beginning an expression, so we need to
13259  // special case them here.
13260  return true;
13261  default:
13262  return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13263  }
13264 }
13265 
13270 static pm_node_t *
13271 parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13272  if (accept1(parser, PM_TOKEN_USTAR)) {
13273  pm_token_t operator = parser->previous;
13274  pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13275  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13276  }
13277 
13278  return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13279 }
13280 
13285 static void
13286 parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13287  // The method name needs to change. If we previously had
13288  // foo, we now need foo=. In this case we'll allocate a new
13289  // owned string, copy the previous method name in, and
13290  // append an =.
13291  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13292  size_t length = constant->length;
13293  uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13294  if (name == NULL) return;
13295 
13296  memcpy(name, constant->start, length);
13297  name[length] = '=';
13298 
13299  // Now switch the name to the new string.
13300  // This silences clang analyzer warning about leak of memory pointed by `name`.
13301  // NOLINTNEXTLINE(clang-analyzer-*)
13302  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13303 }
13304 
13311 static pm_node_t *
13312 parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13313  switch (PM_NODE_TYPE(target)) {
13314  case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13315  case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13316  case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13317  case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13318  case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13319  case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13320  case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13321  default: break;
13322  }
13323 
13324  pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13325  pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13326 
13327  pm_node_destroy(parser, target);
13328  return (pm_node_t *) result;
13329 }
13330 
13336 static void
13337 parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13338  pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13339 
13340  for (size_t index = 0; index < implicit_parameters->size; index++) {
13341  if (implicit_parameters->nodes[index] == node) {
13342  // If the node is not the last one in the list, we need to shift the
13343  // remaining nodes down to fill the gap. This is extremely unlikely
13344  // to happen.
13345  if (index != implicit_parameters->size - 1) {
13346  memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13347  }
13348 
13349  implicit_parameters->size--;
13350  break;
13351  }
13352  }
13353 }
13354 
13363 static pm_node_t *
13364 parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13365  switch (PM_NODE_TYPE(target)) {
13366  case PM_MISSING_NODE:
13367  return target;
13369  case PM_FALSE_NODE:
13370  case PM_SOURCE_FILE_NODE:
13371  case PM_SOURCE_LINE_NODE:
13372  case PM_NIL_NODE:
13373  case PM_SELF_NODE:
13374  case PM_TRUE_NODE: {
13375  // In these special cases, we have specific error messages and we
13376  // will replace them with local variable writes.
13377  return parse_unwriteable_target(parser, target);
13378  }
13382  return target;
13383  case PM_CONSTANT_PATH_NODE:
13384  if (context_def_p(parser)) {
13385  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13386  }
13387 
13388  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
13390 
13391  return target;
13392  case PM_CONSTANT_READ_NODE:
13393  if (context_def_p(parser)) {
13394  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13395  }
13396 
13397  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13398  target->type = PM_CONSTANT_TARGET_NODE;
13399 
13400  return target;
13403  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13404  return target;
13408  return target;
13410  if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13411  PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13412  parse_target_implicit_parameter(parser, target);
13413  }
13414 
13415  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13416  uint32_t name = cast->name;
13417  uint32_t depth = cast->depth;
13418  pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13419 
13422 
13423  return target;
13424  }
13426  pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13427  pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13428 
13429  parse_target_implicit_parameter(parser, target);
13430  pm_node_destroy(parser, target);
13431 
13432  return node;
13433  }
13437  return target;
13438  case PM_MULTI_TARGET_NODE:
13439  if (splat_parent) {
13440  // Multi target is not accepted in all positions. If this is one
13441  // of them, then we need to add an error.
13442  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13443  }
13444 
13445  return target;
13446  case PM_SPLAT_NODE: {
13447  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13448 
13449  if (splat->expression != NULL) {
13450  splat->expression = parse_target(parser, splat->expression, multiple, true);
13451  }
13452 
13453  return (pm_node_t *) splat;
13454  }
13455  case PM_CALL_NODE: {
13456  pm_call_node_t *call = (pm_call_node_t *) target;
13457 
13458  // If we have no arguments to the call node and we need this to be a
13459  // target then this is either a method call or a local variable
13460  // write.
13461  if (
13462  (call->message_loc.start != NULL) &&
13463  (call->message_loc.end[-1] != '!') &&
13464  (call->message_loc.end[-1] != '?') &&
13465  (call->opening_loc.start == NULL) &&
13466  (call->arguments == NULL) &&
13467  (call->block == NULL)
13468  ) {
13469  if (call->receiver == NULL) {
13470  // When we get here, we have a local variable write, because it
13471  // was previously marked as a method call but now we have an =.
13472  // This looks like:
13473  //
13474  // foo = 1
13475  //
13476  // When it was parsed in the prefix position, foo was seen as a
13477  // method call with no receiver and no arguments. Now we have an
13478  // =, so we know it's a local variable write.
13479  const pm_location_t message_loc = call->message_loc;
13480 
13481  pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13482  pm_node_destroy(parser, target);
13483 
13484  return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13485  }
13486 
13487  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13488  if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13489  pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13490  }
13491 
13492  parse_write_name(parser, &call->name);
13493  return (pm_node_t *) pm_call_target_node_create(parser, call);
13494  }
13495  }
13496 
13497  // If there is no call operator and the message is "[]" then this is
13498  // an aref expression, and we can transform it into an aset
13499  // expression.
13500  if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13501  return (pm_node_t *) pm_index_target_node_create(parser, call);
13502  }
13503  }
13504  /* fallthrough */
13505  default:
13506  // In this case we have a node that we don't know how to convert
13507  // into a target. We need to treat it as an error. For now, we'll
13508  // mark it as an error and just skip right past it.
13509  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13510  return target;
13511  }
13512 }
13513 
13518 static pm_node_t *
13519 parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13520  pm_node_t *result = parse_target(parser, target, multiple, false);
13521 
13522  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13523  // parens after the targets.
13524  if (
13525  !match1(parser, PM_TOKEN_EQUAL) &&
13526  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13527  !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13528  ) {
13529  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13530  }
13531 
13532  return result;
13533 }
13534 
13539 static pm_node_t *
13540 parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13541  pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13542 
13543  if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13544  return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13545  }
13546 
13547  return write;
13548 }
13549 
13553 static pm_node_t *
13554 parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13555  switch (PM_NODE_TYPE(target)) {
13556  case PM_MISSING_NODE:
13557  pm_node_destroy(parser, value);
13558  return target;
13560  pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13561  pm_node_destroy(parser, target);
13562  return (pm_node_t *) node;
13563  }
13564  case PM_CONSTANT_PATH_NODE: {
13565  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13566 
13567  if (context_def_p(parser)) {
13568  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13569  }
13570 
13571  return parse_shareable_constant_write(parser, node);
13572  }
13573  case PM_CONSTANT_READ_NODE: {
13574  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13575 
13576  if (context_def_p(parser)) {
13577  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13578  }
13579 
13580  pm_node_destroy(parser, target);
13581  return parse_shareable_constant_write(parser, node);
13582  }
13585  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13586  /* fallthrough */
13588  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13589  pm_node_destroy(parser, target);
13590  return (pm_node_t *) node;
13591  }
13594 
13595  pm_constant_id_t name = local_read->name;
13596  pm_location_t name_loc = target->location;
13597 
13598  uint32_t depth = local_read->depth;
13599  pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13600 
13601  if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13602  pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13603  PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13604  parse_target_implicit_parameter(parser, target);
13605  }
13606 
13607  pm_locals_unread(&scope->locals, name);
13608  pm_node_destroy(parser, target);
13609 
13610  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13611  }
13613  pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13614  pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13615 
13616  parse_target_implicit_parameter(parser, target);
13617  pm_node_destroy(parser, target);
13618 
13619  return node;
13620  }
13622  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13623  pm_node_destroy(parser, target);
13624  return write_node;
13625  }
13626  case PM_MULTI_TARGET_NODE:
13627  return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13628  case PM_SPLAT_NODE: {
13629  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13630 
13631  if (splat->expression != NULL) {
13632  splat->expression = parse_write(parser, splat->expression, operator, value);
13633  }
13634 
13635  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13636  pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13637 
13638  return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13639  }
13640  case PM_CALL_NODE: {
13641  pm_call_node_t *call = (pm_call_node_t *) target;
13642 
13643  // If we have no arguments to the call node and we need this to be a
13644  // target then this is either a method call or a local variable
13645  // write.
13646  if (
13647  (call->message_loc.start != NULL) &&
13648  (call->message_loc.end[-1] != '!') &&
13649  (call->message_loc.end[-1] != '?') &&
13650  (call->opening_loc.start == NULL) &&
13651  (call->arguments == NULL) &&
13652  (call->block == NULL)
13653  ) {
13654  if (call->receiver == NULL) {
13655  // When we get here, we have a local variable write, because it
13656  // was previously marked as a method call but now we have an =.
13657  // This looks like:
13658  //
13659  // foo = 1
13660  //
13661  // When it was parsed in the prefix position, foo was seen as a
13662  // method call with no receiver and no arguments. Now we have an
13663  // =, so we know it's a local variable write.
13664  const pm_location_t message = call->message_loc;
13665 
13666  pm_parser_local_add_location(parser, message.start, message.end, 0);
13667  pm_node_destroy(parser, target);
13668 
13669  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13670  target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13671 
13672  pm_refute_numbered_parameter(parser, message.start, message.end);
13673  return target;
13674  }
13675 
13676  if (char_is_identifier_start(parser, call->message_loc.start)) {
13677  // When we get here, we have a method call, because it was
13678  // previously marked as a method call but now we have an =. This
13679  // looks like:
13680  //
13681  // foo.bar = 1
13682  //
13683  // When it was parsed in the prefix position, foo.bar was seen as a
13684  // method call with no arguments. Now we have an =, so we know it's
13685  // a method call with an argument. In this case we will create the
13686  // arguments node, parse the argument, and add it to the list.
13687  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13688  call->arguments = arguments;
13689 
13690  pm_arguments_node_arguments_append(arguments, value);
13691  call->base.location.end = arguments->base.location.end;
13692 
13693  parse_write_name(parser, &call->name);
13694  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13695 
13696  return (pm_node_t *) call;
13697  }
13698  }
13699 
13700  // If there is no call operator and the message is "[]" then this is
13701  // an aref expression, and we can transform it into an aset
13702  // expression.
13703  if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13704  if (call->arguments == NULL) {
13705  call->arguments = pm_arguments_node_create(parser);
13706  }
13707 
13708  pm_arguments_node_arguments_append(call->arguments, value);
13709  target->location.end = value->location.end;
13710 
13711  // Replace the name with "[]=".
13712  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13713  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13714 
13715  return target;
13716  }
13717 
13718  // If there are arguments on the call node, then it can't be a method
13719  // call ending with = or a local variable write, so it must be a
13720  // syntax error. In this case we'll fall through to our default
13721  // handling. We need to free the value that we parsed because there
13722  // is no way for us to attach it to the tree at this point.
13723  pm_node_destroy(parser, value);
13724  }
13725  /* fallthrough */
13726  default:
13727  // In this case we have a node that we don't know how to convert into a
13728  // target. We need to treat it as an error. For now, we'll mark it as an
13729  // error and just skip right past it.
13730  pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13731  return target;
13732  }
13733 }
13734 
13741 static pm_node_t *
13742 parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13743  switch (PM_NODE_TYPE(target)) {
13744  case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13745  case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13746  case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13747  case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13748  case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13749  case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13750  case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13751  default: break;
13752  }
13753 
13754  pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13755  pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13756 
13757  pm_node_destroy(parser, target);
13758  return (pm_node_t *) result;
13759 }
13760 
13771 static pm_node_t *
13772 parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13773  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13774 
13775  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13776  pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13777 
13778  while (accept1(parser, PM_TOKEN_COMMA)) {
13779  if (accept1(parser, PM_TOKEN_USTAR)) {
13780  // Here we have a splat operator. It can have a name or be
13781  // anonymous. It can be the final target or be in the middle if
13782  // there haven't been any others yet.
13783  if (has_rest) {
13784  pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13785  }
13786 
13787  pm_token_t star_operator = parser->previous;
13788  pm_node_t *name = NULL;
13789 
13790  if (token_begins_expression_p(parser->current.type)) {
13791  name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13792  name = parse_target(parser, name, true, true);
13793  }
13794 
13795  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13796  pm_multi_target_node_targets_append(parser, result, splat);
13797  has_rest = true;
13798  } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13799  context_push(parser, PM_CONTEXT_MULTI_TARGET);
13800  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13801  target = parse_target(parser, target, true, false);
13802 
13803  pm_multi_target_node_targets_append(parser, result, target);
13804  context_pop(parser);
13805  } else if (token_begins_expression_p(parser->current.type)) {
13806  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13807  target = parse_target(parser, target, true, false);
13808 
13809  pm_multi_target_node_targets_append(parser, result, target);
13810  } else if (!match1(parser, PM_TOKEN_EOF)) {
13811  // If we get here, then we have a trailing , in a multi target node.
13812  // We'll add an implicit rest node to represent this.
13813  pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13814  pm_multi_target_node_targets_append(parser, result, rest);
13815  break;
13816  }
13817  }
13818 
13819  return (pm_node_t *) result;
13820 }
13821 
13826 static pm_node_t *
13827 parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13828  pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13829  accept1(parser, PM_TOKEN_NEWLINE);
13830 
13831  // Ensure that we have either an = or a ) after the targets.
13832  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13833  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13834  }
13835 
13836  return result;
13837 }
13838 
13842 static pm_statements_node_t *
13843 parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13844  // First, skip past any optional terminators that might be at the beginning
13845  // of the statements.
13846  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13847 
13848  // If we have a terminator, then we can just return NULL.
13849  if (context_terminator(context, &parser->current)) return NULL;
13850 
13851  pm_statements_node_t *statements = pm_statements_node_create(parser);
13852 
13853  // At this point we know we have at least one statement, and that it
13854  // immediately follows the current token.
13855  context_push(parser, context);
13856 
13857  while (true) {
13858  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13859  pm_statements_node_body_append(parser, statements, node, true);
13860 
13861  // If we're recovering from a syntax error, then we need to stop parsing
13862  // the statements now.
13863  if (parser->recovering) {
13864  // If this is the level of context where the recovery has happened,
13865  // then we can mark the parser as done recovering.
13866  if (context_terminator(context, &parser->current)) parser->recovering = false;
13867  break;
13868  }
13869 
13870  // If we have a terminator, then we will parse all consecutive
13871  // terminators and then continue parsing the statements list.
13872  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13873  // If we have a terminator, then we will continue parsing the
13874  // statements list.
13875  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13876  if (context_terminator(context, &parser->current)) break;
13877 
13878  // Now we can continue parsing the list of statements.
13879  continue;
13880  }
13881 
13882  // At this point we have a list of statements that are not terminated by
13883  // a newline or semicolon. At this point we need to check if we're at
13884  // the end of the statements list. If we are, then we should break out
13885  // of the loop.
13886  if (context_terminator(context, &parser->current)) break;
13887 
13888  // At this point, we have a syntax error, because the statement was not
13889  // terminated by a newline or semicolon, and we're not at the end of the
13890  // statements list. Ideally we should scan forward to determine if we
13891  // should insert a missing terminator or break out of parsing the
13892  // statements list at this point.
13893  //
13894  // We don't have that yet, so instead we'll do a more naive approach. If
13895  // we were unable to parse an expression, then we will skip past this
13896  // token and continue parsing the statements list. Otherwise we'll add
13897  // an error and continue parsing the statements list.
13898  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13899  parser_lex(parser);
13900 
13901  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13902  if (context_terminator(context, &parser->current)) break;
13903  } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13904  // This is an inlined version of accept1 because the error that we
13905  // want to add has varargs. If this happens again, we should
13906  // probably extract a helper function.
13907  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13908  parser->previous.start = parser->previous.end;
13909  parser->previous.type = PM_TOKEN_MISSING;
13910  }
13911  }
13912 
13913  context_pop(parser);
13914  bool last_value = true;
13915  switch (context) {
13917  case PM_CONTEXT_DEF_ENSURE:
13918  last_value = false;
13919  break;
13920  default:
13921  break;
13922  }
13923  pm_void_statements_check(parser, statements, last_value);
13924 
13925  return statements;
13926 }
13927 
13932 static void
13933 pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13934  const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13935 
13936  if (duplicated != NULL) {
13937  pm_buffer_t buffer = { 0 };
13938  pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13939 
13941  &parser->warning_list,
13942  duplicated->location.start,
13943  duplicated->location.end,
13944  PM_WARN_DUPLICATED_HASH_KEY,
13945  (int) pm_buffer_length(&buffer),
13946  pm_buffer_value(&buffer),
13948  );
13949 
13950  pm_buffer_free(&buffer);
13951  }
13952 }
13953 
13958 static void
13959 pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13960  pm_node_t *previous;
13961 
13962  if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13964  &parser->warning_list,
13965  node->location.start,
13966  node->location.end,
13967  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13970  );
13971  }
13972 }
13973 
13977 static bool
13978 parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13980  bool contains_keyword_splat = false;
13981 
13982  while (true) {
13983  pm_node_t *element;
13984 
13985  switch (parser->current.type) {
13986  case PM_TOKEN_USTAR_STAR: {
13987  parser_lex(parser);
13988  pm_token_t operator = parser->previous;
13989  pm_node_t *value = NULL;
13990 
13991  if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13992  // If we're about to parse a nested hash that is being
13993  // pushed into this hash directly with **, then we want the
13994  // inner hash to share the static literals with the outer
13995  // hash.
13996  parser->current_hash_keys = literals;
13997  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13998  } else if (token_begins_expression_p(parser->current.type)) {
13999  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14000  } else {
14001  pm_parser_scope_forwarding_keywords_check(parser, &operator);
14002  }
14003 
14004  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14005  contains_keyword_splat = true;
14006  break;
14007  }
14008  case PM_TOKEN_LABEL: {
14009  pm_token_t label = parser->current;
14010  parser_lex(parser);
14011 
14012  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14013  pm_hash_key_static_literals_add(parser, literals, key);
14014 
14015  pm_token_t operator = not_provided(parser);
14016  pm_node_t *value = NULL;
14017 
14018  if (token_begins_expression_p(parser->current.type)) {
14019  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14020  } else {
14021  if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14022  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14023  value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14024  } else {
14025  int depth = -1;
14026  pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14027 
14028  if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14029  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14030  } else {
14031  depth = pm_parser_local_depth(parser, &identifier);
14032  }
14033 
14034  if (depth == -1) {
14035  value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14036  } else {
14037  value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14038  }
14039  }
14040 
14041  value->location.end++;
14042  value = (pm_node_t *) pm_implicit_node_create(parser, value);
14043  }
14044 
14045  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14046  break;
14047  }
14048  default: {
14049  pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14050 
14051  // Hash keys that are strings are automatically frozen. We will
14052  // mark that here.
14053  if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14054  pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14055  }
14056 
14057  pm_hash_key_static_literals_add(parser, literals, key);
14058 
14059  pm_token_t operator;
14060  if (pm_symbol_node_label_p(key)) {
14061  operator = not_provided(parser);
14062  } else {
14063  expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14064  operator = parser->previous;
14065  }
14066 
14067  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14068  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14069  break;
14070  }
14071  }
14072 
14073  if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14074  pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14075  } else {
14076  pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14077  }
14078 
14079  // If there's no comma after the element, then we're done.
14080  if (!accept1(parser, PM_TOKEN_COMMA)) break;
14081 
14082  // If the next element starts with a label or a **, then we know we have
14083  // another element in the hash, so we'll continue parsing.
14084  if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14085 
14086  // Otherwise we need to check if the subsequent token begins an expression.
14087  // If it does, then we'll continue parsing.
14088  if (token_begins_expression_p(parser->current.type)) continue;
14089 
14090  // Otherwise by default we will exit out of this loop.
14091  break;
14092  }
14093 
14094  return contains_keyword_splat;
14095 }
14096 
14100 static inline void
14101 parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14102  if (arguments->arguments == NULL) {
14103  arguments->arguments = pm_arguments_node_create(parser);
14104  }
14105 
14106  pm_arguments_node_arguments_append(arguments->arguments, argument);
14107 }
14108 
14112 static void
14113 parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14114  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14115 
14116  // First we need to check if the next token is one that could be the start
14117  // of an argument. If it's not, then we can just return.
14118  if (
14119  match2(parser, terminator, PM_TOKEN_EOF) ||
14120  (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14121  context_terminator(parser->current_context->context, &parser->current)
14122  ) {
14123  return;
14124  }
14125 
14126  bool parsed_first_argument = false;
14127  bool parsed_bare_hash = false;
14128  bool parsed_block_argument = false;
14129  bool parsed_forwarding_arguments = false;
14130 
14131  while (!match1(parser, PM_TOKEN_EOF)) {
14132  if (parsed_block_argument) {
14133  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14134  }
14135  if (parsed_forwarding_arguments) {
14136  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14137  }
14138 
14139  pm_node_t *argument = NULL;
14140 
14141  switch (parser->current.type) {
14142  case PM_TOKEN_USTAR_STAR:
14143  case PM_TOKEN_LABEL: {
14144  if (parsed_bare_hash) {
14145  pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14146  }
14147 
14148  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14149  argument = (pm_node_t *) hash;
14150 
14151  pm_static_literals_t hash_keys = { 0 };
14152  bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14153 
14154  parse_arguments_append(parser, arguments, argument);
14155 
14157  if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14158  pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14159 
14160  pm_static_literals_free(&hash_keys);
14161  parsed_bare_hash = true;
14162 
14163  break;
14164  }
14165  case PM_TOKEN_UAMPERSAND: {
14166  parser_lex(parser);
14167  pm_token_t operator = parser->previous;
14168  pm_node_t *expression = NULL;
14169 
14170  if (token_begins_expression_p(parser->current.type)) {
14171  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14172  } else {
14173  pm_parser_scope_forwarding_block_check(parser, &operator);
14174  }
14175 
14176  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14177  if (parsed_block_argument) {
14178  parse_arguments_append(parser, arguments, argument);
14179  } else {
14180  arguments->block = argument;
14181  }
14182 
14183  parsed_block_argument = true;
14184  break;
14185  }
14186  case PM_TOKEN_USTAR: {
14187  parser_lex(parser);
14188  pm_token_t operator = parser->previous;
14189 
14191  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14192  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14193  if (parsed_bare_hash) {
14194  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14195  }
14196  } else {
14197  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14198 
14199  if (parsed_bare_hash) {
14200  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14201  }
14202 
14203  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14204  }
14205 
14206  parse_arguments_append(parser, arguments, argument);
14207  break;
14208  }
14209  case PM_TOKEN_UDOT_DOT_DOT: {
14210  if (accepts_forwarding) {
14211  parser_lex(parser);
14212 
14213  if (token_begins_expression_p(parser->current.type)) {
14214  // If the token begins an expression then this ... was
14215  // not actually argument forwarding but was instead a
14216  // range.
14217  pm_token_t operator = parser->previous;
14218  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14219 
14220  // If we parse a range, we need to validate that we
14221  // didn't accidentally violate the nonassoc rules of the
14222  // ... operator.
14223  if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14224  pm_range_node_t *range = (pm_range_node_t *) right;
14225  pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14226  }
14227 
14228  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14229  } else {
14230  pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14231  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14232  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14233  }
14234 
14235  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14236  parse_arguments_append(parser, arguments, argument);
14237  pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14238  arguments->has_forwarding = true;
14239  parsed_forwarding_arguments = true;
14240  break;
14241  }
14242  }
14243  }
14244  /* fallthrough */
14245  default: {
14246  if (argument == NULL) {
14247  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14248  }
14249 
14250  bool contains_keywords = false;
14251  bool contains_keyword_splat = false;
14252 
14253  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14254  if (parsed_bare_hash) {
14255  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14256  }
14257 
14258  pm_token_t operator;
14259  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14260  operator = parser->previous;
14261  } else {
14262  operator = not_provided(parser);
14263  }
14264 
14265  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14266  contains_keywords = true;
14267 
14268  // Create the set of static literals for this hash.
14269  pm_static_literals_t hash_keys = { 0 };
14270  pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14271 
14272  // Finish parsing the one we are part way through.
14273  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14274  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14275 
14276  pm_keyword_hash_node_elements_append(bare_hash, argument);
14277  argument = (pm_node_t *) bare_hash;
14278 
14279  // Then parse more if we have a comma
14280  if (accept1(parser, PM_TOKEN_COMMA) && (
14281  token_begins_expression_p(parser->current.type) ||
14282  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14283  )) {
14284  contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14285  }
14286 
14287  pm_static_literals_free(&hash_keys);
14288  parsed_bare_hash = true;
14289  }
14290 
14291  parse_arguments_append(parser, arguments, argument);
14292 
14293  pm_node_flags_t flags = 0;
14294  if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14295  if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14296  pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14297 
14298  break;
14299  }
14300  }
14301 
14302  parsed_first_argument = true;
14303 
14304  // If parsing the argument failed, we need to stop parsing arguments.
14305  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14306 
14307  // If the terminator of these arguments is not EOF, then we have a
14308  // specific token we're looking for. In that case we can accept a
14309  // newline here because it is not functioning as a statement terminator.
14310  bool accepted_newline = false;
14311  if (terminator != PM_TOKEN_EOF) {
14312  accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14313  }
14314 
14315  if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14316  // If we previously were on a comma and we just parsed a bare hash,
14317  // then we want to continue parsing arguments. This is because the
14318  // comma was grabbed up by the hash parser.
14319  } else if (accept1(parser, PM_TOKEN_COMMA)) {
14320  // If there was a comma, then we need to check if we also accepted a
14321  // newline. If we did, then this is a syntax error.
14322  if (accepted_newline) {
14323  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14324  }
14325  } else {
14326  // If there is no comma at the end of the argument list then we're
14327  // done parsing arguments and can break out of this loop.
14328  break;
14329  }
14330 
14331  // If we hit the terminator, then that means we have a trailing comma so
14332  // we can accept that output as well.
14333  if (match1(parser, terminator)) break;
14334  }
14335 }
14336 
14347 static pm_multi_target_node_t *
14348 parse_required_destructured_parameter(pm_parser_t *parser) {
14349  expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14350 
14351  pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14352  pm_multi_target_node_opening_set(node, &parser->previous);
14353 
14354  do {
14355  pm_node_t *param;
14356 
14357  // If we get here then we have a trailing comma, which isn't allowed in
14358  // the grammar. In other places, multi targets _do_ allow trailing
14359  // commas, so here we'll assume this is a mistake of the user not
14360  // knowing it's not allowed here.
14361  if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14362  param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14363  pm_multi_target_node_targets_append(parser, node, param);
14364  pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14365  break;
14366  }
14367 
14368  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14369  param = (pm_node_t *) parse_required_destructured_parameter(parser);
14370  } else if (accept1(parser, PM_TOKEN_USTAR)) {
14371  pm_token_t star = parser->previous;
14372  pm_node_t *value = NULL;
14373 
14374  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14375  pm_token_t name = parser->previous;
14376  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14377  if (pm_parser_parameter_name_check(parser, &name)) {
14378  pm_node_flag_set_repeated_parameter(value);
14379  }
14380  pm_parser_local_add_token(parser, &name, 1);
14381  }
14382 
14383  param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14384  } else {
14385  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14386  pm_token_t name = parser->previous;
14387 
14388  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14389  if (pm_parser_parameter_name_check(parser, &name)) {
14390  pm_node_flag_set_repeated_parameter(param);
14391  }
14392  pm_parser_local_add_token(parser, &name, 1);
14393  }
14394 
14395  pm_multi_target_node_targets_append(parser, node, param);
14396  } while (accept1(parser, PM_TOKEN_COMMA));
14397 
14398  accept1(parser, PM_TOKEN_NEWLINE);
14399  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14400  pm_multi_target_node_closing_set(node, &parser->previous);
14401 
14402  return node;
14403 }
14404 
14409 typedef enum {
14410  PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14411  PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14412  PM_PARAMETERS_ORDER_KEYWORDS_REST,
14413  PM_PARAMETERS_ORDER_KEYWORDS,
14414  PM_PARAMETERS_ORDER_REST,
14415  PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14416  PM_PARAMETERS_ORDER_OPTIONAL,
14417  PM_PARAMETERS_ORDER_NAMED,
14418  PM_PARAMETERS_ORDER_NONE,
14419 } pm_parameters_order_t;
14420 
14424 static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14425  [0] = PM_PARAMETERS_NO_CHANGE,
14426  [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14427  [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14428  [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14429  [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14430  [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14431  [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14432  [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14433  [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14434  [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14435  [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14436  [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14437 };
14438 
14446 static bool
14447 update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14448  pm_parameters_order_t state = parameters_ordering[token->type];
14449  if (state == PM_PARAMETERS_NO_CHANGE) return true;
14450 
14451  // If we see another ordered argument after a optional argument
14452  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14453  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14454  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14455  return true;
14456  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14457  return true;
14458  }
14459 
14460  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14461  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14462  return false;
14463  } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14464  pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14465  return false;
14466  } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14467  // We know what transition we failed on, so we can provide a better error here.
14468  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14469  return false;
14470  }
14471 
14472  if (state < *current) *current = state;
14473  return true;
14474 }
14475 
14479 static pm_parameters_node_t *
14480 parse_parameters(
14481  pm_parser_t *parser,
14482  pm_binding_power_t binding_power,
14483  bool uses_parentheses,
14484  bool allows_trailing_comma,
14485  bool allows_forwarding_parameters,
14486  bool accepts_blocks_in_defaults,
14487  uint16_t depth
14488 ) {
14489  pm_do_loop_stack_push(parser, false);
14490 
14491  pm_parameters_node_t *params = pm_parameters_node_create(parser);
14492  pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14493 
14494  while (true) {
14495  bool parsing = true;
14496 
14497  switch (parser->current.type) {
14499  update_parameter_state(parser, &parser->current, &order);
14500  pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14501 
14502  if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14503  pm_parameters_node_requireds_append(params, param);
14504  } else {
14505  pm_parameters_node_posts_append(params, param);
14506  }
14507  break;
14508  }
14509  case PM_TOKEN_UAMPERSAND:
14510  case PM_TOKEN_AMPERSAND: {
14511  update_parameter_state(parser, &parser->current, &order);
14512  parser_lex(parser);
14513 
14514  pm_token_t operator = parser->previous;
14515  pm_token_t name;
14516 
14517  bool repeated = false;
14518  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14519  name = parser->previous;
14520  repeated = pm_parser_parameter_name_check(parser, &name);
14521  pm_parser_local_add_token(parser, &name, 1);
14522  } else {
14523  name = not_provided(parser);
14524  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14525  }
14526 
14527  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14528  if (repeated) {
14529  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14530  }
14531  if (params->block == NULL) {
14532  pm_parameters_node_block_set(params, param);
14533  } else {
14534  pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14535  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14536  }
14537 
14538  break;
14539  }
14540  case PM_TOKEN_UDOT_DOT_DOT: {
14541  if (!allows_forwarding_parameters) {
14542  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14543  }
14544 
14545  bool succeeded = update_parameter_state(parser, &parser->current, &order);
14546  parser_lex(parser);
14547 
14548  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14549  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14550 
14551  if (params->keyword_rest != NULL) {
14552  // If we already have a keyword rest parameter, then we replace it with the
14553  // forwarding parameter and move the keyword rest parameter to the posts list.
14554  pm_node_t *keyword_rest = params->keyword_rest;
14555  pm_parameters_node_posts_append(params, keyword_rest);
14556  if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14557  params->keyword_rest = NULL;
14558  }
14559 
14560  pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14561  break;
14562  }
14564  case PM_TOKEN_IDENTIFIER:
14565  case PM_TOKEN_CONSTANT:
14568  case PM_TOKEN_METHOD_NAME: {
14569  parser_lex(parser);
14570  switch (parser->previous.type) {
14571  case PM_TOKEN_CONSTANT:
14572  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14573  break;
14575  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14576  break;
14578  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14579  break;
14581  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14582  break;
14583  case PM_TOKEN_METHOD_NAME:
14584  pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14585  break;
14586  default: break;
14587  }
14588 
14589  if (parser->current.type == PM_TOKEN_EQUAL) {
14590  update_parameter_state(parser, &parser->current, &order);
14591  } else {
14592  update_parameter_state(parser, &parser->previous, &order);
14593  }
14594 
14595  pm_token_t name = parser->previous;
14596  bool repeated = pm_parser_parameter_name_check(parser, &name);
14597  pm_parser_local_add_token(parser, &name, 1);
14598 
14599  if (match1(parser, PM_TOKEN_EQUAL)) {
14600  pm_token_t operator = parser->current;
14601  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14602  parser_lex(parser);
14603 
14604  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14605  uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14606 
14607  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14608  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14609  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14610 
14611  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14612 
14613  if (repeated) {
14614  pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14615  }
14616  pm_parameters_node_optionals_append(params, param);
14617 
14618  // If the value of the parameter increased the number of
14619  // reads of that parameter, then we need to warn that we
14620  // have a circular definition.
14621  if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14622  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14623  }
14624 
14625  context_pop(parser);
14626 
14627  // If parsing the value of the parameter resulted in error recovery,
14628  // then we can put a missing node in its place and stop parsing the
14629  // parameters entirely now.
14630  if (parser->recovering) {
14631  parsing = false;
14632  break;
14633  }
14634  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14635  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14636  if (repeated) {
14637  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14638  }
14639  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14640  } else {
14641  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14642  if (repeated) {
14643  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14644  }
14645  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14646  }
14647 
14648  break;
14649  }
14650  case PM_TOKEN_LABEL: {
14651  if (!uses_parentheses) parser->in_keyword_arg = true;
14652  update_parameter_state(parser, &parser->current, &order);
14653 
14654  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14655  parser_lex(parser);
14656 
14657  pm_token_t name = parser->previous;
14658  pm_token_t local = name;
14659  local.end -= 1;
14660 
14661  if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14662  pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14663  } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14664  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14665  }
14666 
14667  bool repeated = pm_parser_parameter_name_check(parser, &local);
14668  pm_parser_local_add_token(parser, &local, 1);
14669 
14670  switch (parser->current.type) {
14671  case PM_TOKEN_COMMA:
14673  case PM_TOKEN_PIPE: {
14674  context_pop(parser);
14675 
14676  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14677  if (repeated) {
14678  pm_node_flag_set_repeated_parameter(param);
14679  }
14680 
14681  pm_parameters_node_keywords_append(params, param);
14682  break;
14683  }
14684  case PM_TOKEN_SEMICOLON:
14685  case PM_TOKEN_NEWLINE: {
14686  context_pop(parser);
14687 
14688  if (uses_parentheses) {
14689  parsing = false;
14690  break;
14691  }
14692 
14693  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14694  if (repeated) {
14695  pm_node_flag_set_repeated_parameter(param);
14696  }
14697 
14698  pm_parameters_node_keywords_append(params, param);
14699  break;
14700  }
14701  default: {
14702  pm_node_t *param;
14703 
14704  if (token_begins_expression_p(parser->current.type)) {
14705  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14706  uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14707 
14708  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14709  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14710  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14711 
14712  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14713  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14714  }
14715 
14716  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14717  }
14718  else {
14719  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14720  }
14721 
14722  if (repeated) {
14723  pm_node_flag_set_repeated_parameter(param);
14724  }
14725 
14726  context_pop(parser);
14727  pm_parameters_node_keywords_append(params, param);
14728 
14729  // If parsing the value of the parameter resulted in error recovery,
14730  // then we can put a missing node in its place and stop parsing the
14731  // parameters entirely now.
14732  if (parser->recovering) {
14733  parsing = false;
14734  break;
14735  }
14736  }
14737  }
14738 
14739  parser->in_keyword_arg = false;
14740  break;
14741  }
14742  case PM_TOKEN_USTAR:
14743  case PM_TOKEN_STAR: {
14744  update_parameter_state(parser, &parser->current, &order);
14745  parser_lex(parser);
14746 
14747  pm_token_t operator = parser->previous;
14748  pm_token_t name;
14749  bool repeated = false;
14750 
14751  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14752  name = parser->previous;
14753  repeated = pm_parser_parameter_name_check(parser, &name);
14754  pm_parser_local_add_token(parser, &name, 1);
14755  } else {
14756  name = not_provided(parser);
14757  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14758  }
14759 
14760  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14761  if (repeated) {
14762  pm_node_flag_set_repeated_parameter(param);
14763  }
14764 
14765  if (params->rest == NULL) {
14766  pm_parameters_node_rest_set(params, param);
14767  } else {
14768  pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14769  pm_parameters_node_posts_append(params, param);
14770  }
14771 
14772  break;
14773  }
14774  case PM_TOKEN_STAR_STAR:
14775  case PM_TOKEN_USTAR_STAR: {
14776  pm_parameters_order_t previous_order = order;
14777  update_parameter_state(parser, &parser->current, &order);
14778  parser_lex(parser);
14779 
14780  pm_token_t operator = parser->previous;
14781  pm_node_t *param;
14782 
14783  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14784  if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14785  pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14786  }
14787 
14788  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14789  } else {
14790  pm_token_t name;
14791 
14792  bool repeated = false;
14793  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14794  name = parser->previous;
14795  repeated = pm_parser_parameter_name_check(parser, &name);
14796  pm_parser_local_add_token(parser, &name, 1);
14797  } else {
14798  name = not_provided(parser);
14799  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14800  }
14801 
14802  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14803  if (repeated) {
14804  pm_node_flag_set_repeated_parameter(param);
14805  }
14806  }
14807 
14808  if (params->keyword_rest == NULL) {
14809  pm_parameters_node_keyword_rest_set(params, param);
14810  } else {
14811  pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14812  pm_parameters_node_posts_append(params, param);
14813  }
14814 
14815  break;
14816  }
14817  default:
14818  if (parser->previous.type == PM_TOKEN_COMMA) {
14819  if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14820  // If we get here, then we have a trailing comma in a
14821  // block parameter list.
14822  pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14823 
14824  if (params->rest == NULL) {
14825  pm_parameters_node_rest_set(params, param);
14826  } else {
14827  pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14828  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14829  }
14830  } else {
14831  pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14832  }
14833  }
14834 
14835  parsing = false;
14836  break;
14837  }
14838 
14839  // If we hit some kind of issue while parsing the parameter, this would
14840  // have been set to false. In that case, we need to break out of the
14841  // loop.
14842  if (!parsing) break;
14843 
14844  bool accepted_newline = false;
14845  if (uses_parentheses) {
14846  accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14847  }
14848 
14849  if (accept1(parser, PM_TOKEN_COMMA)) {
14850  // If there was a comma, but we also accepted a newline, then this
14851  // is a syntax error.
14852  if (accepted_newline) {
14853  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14854  }
14855  } else {
14856  // If there was no comma, then we're done parsing parameters.
14857  break;
14858  }
14859  }
14860 
14861  pm_do_loop_stack_pop(parser);
14862 
14863  // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14864  if (params->base.location.start == params->base.location.end) {
14865  pm_node_destroy(parser, (pm_node_t *) params);
14866  return NULL;
14867  }
14868 
14869  return params;
14870 }
14871 
14876 static size_t
14877 token_newline_index(const pm_parser_t *parser) {
14878  if (parser->heredoc_end == NULL) {
14879  // This is the common case. In this case we can look at the previously
14880  // recorded newline in the newline list and subtract from the current
14881  // offset.
14882  return parser->newline_list.size - 1;
14883  } else {
14884  // This is unlikely. This is the case that we have already parsed the
14885  // start of a heredoc, so we cannot rely on looking at the previous
14886  // offset of the newline list, and instead must go through the whole
14887  // process of a binary search for the line number.
14888  return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14889  }
14890 }
14891 
14896 static int64_t
14897 token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14898  const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14899  const uint8_t *end = token->start;
14900 
14901  // Skip over the BOM if it is present.
14902  if (
14903  newline_index == 0 &&
14904  parser->start[0] == 0xef &&
14905  parser->start[1] == 0xbb &&
14906  parser->start[2] == 0xbf
14907  ) cursor += 3;
14908 
14909  int64_t column = 0;
14910  for (; cursor < end; cursor++) {
14911  switch (*cursor) {
14912  case '\t':
14913  column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14914  break;
14915  case ' ':
14916  column++;
14917  break;
14918  default:
14919  column++;
14920  if (break_on_non_space) return -1;
14921  break;
14922  }
14923  }
14924 
14925  return column;
14926 }
14927 
14932 static void
14933 parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14934  // If these warnings are disabled (unlikely), then we can just return.
14935  if (!parser->warn_mismatched_indentation) return;
14936 
14937  // If the tokens are on the same line, we do not warn.
14938  size_t closing_newline_index = token_newline_index(parser);
14939  if (opening_newline_index == closing_newline_index) return;
14940 
14941  // If the opening token has anything other than spaces or tabs before it,
14942  // then we do not warn. This is unless we are matching up an `if`/`end` pair
14943  // and the `if` immediately follows an `else` keyword.
14944  int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14945  if (!if_after_else && (opening_column == -1)) return;
14946 
14947  // Get a reference to the closing token off the current parser. This assumes
14948  // that the caller has placed this in the correct position.
14949  pm_token_t *closing_token = &parser->current;
14950 
14951  // If the tokens are at the same indentation, we do not warn.
14952  int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14953  if ((closing_column == -1) || (opening_column == closing_column)) return;
14954 
14955  // If the closing column is greater than the opening column and we are
14956  // allowing indentation, then we do not warn.
14957  if (allow_indent && (closing_column > opening_column)) return;
14958 
14959  // Otherwise, add a warning.
14960  PM_PARSER_WARN_FORMAT(
14961  parser,
14962  closing_token->start,
14963  closing_token->end,
14964  PM_WARN_INDENTATION_MISMATCH,
14965  (int) (closing_token->end - closing_token->start),
14966  (const char *) closing_token->start,
14967  (int) (opening_token->end - opening_token->start),
14968  (const char *) opening_token->start,
14969  ((int32_t) opening_newline_index) + parser->start_line
14970  );
14971 }
14972 
14973 typedef enum {
14974  PM_RESCUES_BEGIN = 1,
14975  PM_RESCUES_BLOCK,
14976  PM_RESCUES_CLASS,
14977  PM_RESCUES_DEF,
14978  PM_RESCUES_LAMBDA,
14979  PM_RESCUES_MODULE,
14980  PM_RESCUES_SCLASS
14981 } pm_rescues_type_t;
14982 
14987 static inline void
14988 parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14989  pm_rescue_node_t *current = NULL;
14990 
14991  while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14992  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14993  parser_lex(parser);
14994 
14995  pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14996 
14997  switch (parser->current.type) {
14998  case PM_TOKEN_EQUAL_GREATER: {
14999  // Here we have an immediate => after the rescue keyword, in which case
15000  // we're going to have an empty list of exceptions to rescue (which
15001  // implies StandardError).
15002  parser_lex(parser);
15003  pm_rescue_node_operator_set(rescue, &parser->previous);
15004 
15005  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15006  reference = parse_target(parser, reference, false, false);
15007 
15008  pm_rescue_node_reference_set(rescue, reference);
15009  break;
15010  }
15011  case PM_TOKEN_NEWLINE:
15012  case PM_TOKEN_SEMICOLON:
15013  case PM_TOKEN_KEYWORD_THEN:
15014  // Here we have a terminator for the rescue keyword, in which case we're
15015  // going to just continue on.
15016  break;
15017  default: {
15018  if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15019  // Here we have something that could be an exception expression, so
15020  // we'll attempt to parse it here and any others delimited by commas.
15021 
15022  do {
15023  pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15024  pm_rescue_node_exceptions_append(rescue, expression);
15025 
15026  // If we hit a newline, then this is the end of the rescue expression. We
15027  // can continue on to parse the statements.
15028  if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15029 
15030  // If we hit a `=>` then we're going to parse the exception variable. Once
15031  // we've done that, we'll break out of the loop and parse the statements.
15032  if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15033  pm_rescue_node_operator_set(rescue, &parser->previous);
15034 
15035  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15036  reference = parse_target(parser, reference, false, false);
15037 
15038  pm_rescue_node_reference_set(rescue, reference);
15039  break;
15040  }
15041  } while (accept1(parser, PM_TOKEN_COMMA));
15042  }
15043  }
15044  }
15045 
15046  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15047  accept1(parser, PM_TOKEN_KEYWORD_THEN);
15048  } else {
15049  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15050  }
15051 
15053  pm_accepts_block_stack_push(parser, true);
15054  pm_context_t context;
15055 
15056  switch (type) {
15057  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15058  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15059  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15060  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15061  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15062  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15063  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15064  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15065  }
15066 
15067  pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15068  if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15069 
15070  pm_accepts_block_stack_pop(parser);
15071  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15072  }
15073 
15074  if (current == NULL) {
15075  pm_begin_node_rescue_clause_set(parent_node, rescue);
15076  } else {
15077  pm_rescue_node_subsequent_set(current, rescue);
15078  }
15079 
15080  current = rescue;
15081  }
15082 
15083  // The end node locations on rescue nodes will not be set correctly
15084  // since we won't know the end until we've found all subsequent
15085  // clauses. This sets the end location on all rescues once we know it.
15086  if (current != NULL) {
15087  const uint8_t *end_to_set = current->base.location.end;
15088  pm_rescue_node_t *clause = parent_node->rescue_clause;
15089 
15090  while (clause != NULL) {
15091  clause->base.location.end = end_to_set;
15092  clause = clause->subsequent;
15093  }
15094  }
15095 
15096  pm_token_t else_keyword;
15097  if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15098  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15099  opening_newline_index = token_newline_index(parser);
15100 
15101  else_keyword = parser->current;
15102  opening = &else_keyword;
15103 
15104  parser_lex(parser);
15105  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15106 
15107  pm_statements_node_t *else_statements = NULL;
15108  if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15109  pm_accepts_block_stack_push(parser, true);
15110  pm_context_t context;
15111 
15112  switch (type) {
15113  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15114  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15115  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15116  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15117  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15118  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15119  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15120  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15121  }
15122 
15123  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15124  pm_accepts_block_stack_pop(parser);
15125 
15126  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15127  }
15128 
15129  pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15130  pm_begin_node_else_clause_set(parent_node, else_clause);
15131 
15132  // If we don't have a `current` rescue node, then this is a dangling
15133  // else, and it's an error.
15134  if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15135  }
15136 
15137  if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15138  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15139  pm_token_t ensure_keyword = parser->current;
15140 
15141  parser_lex(parser);
15142  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15143 
15144  pm_statements_node_t *ensure_statements = NULL;
15145  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15146  pm_accepts_block_stack_push(parser, true);
15147  pm_context_t context;
15148 
15149  switch (type) {
15150  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15151  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15152  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15153  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15154  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15155  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15156  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15157  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15158  }
15159 
15160  ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15161  pm_accepts_block_stack_pop(parser);
15162 
15163  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15164  }
15165 
15166  pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15167  pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15168  }
15169 
15170  if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15171  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15172  pm_begin_node_end_keyword_set(parent_node, &parser->current);
15173  } else {
15174  pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15175  pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15176  }
15177 }
15178 
15183 static pm_begin_node_t *
15184 parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15185  pm_token_t begin_keyword = not_provided(parser);
15186  pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15187 
15188  parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15189  node->base.location.start = start;
15190 
15191  return node;
15192 }
15193 
15198 parse_block_parameters(
15199  pm_parser_t *parser,
15200  bool allows_trailing_comma,
15201  const pm_token_t *opening,
15202  bool is_lambda_literal,
15203  bool accepts_blocks_in_defaults,
15204  uint16_t depth
15205 ) {
15206  pm_parameters_node_t *parameters = NULL;
15207  if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15208  parameters = parse_parameters(
15209  parser,
15210  is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15211  false,
15212  allows_trailing_comma,
15213  false,
15214  accepts_blocks_in_defaults,
15215  (uint16_t) (depth + 1)
15216  );
15217  }
15218 
15219  pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15220  if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15221  accept1(parser, PM_TOKEN_NEWLINE);
15222 
15223  if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15224  do {
15225  switch (parser->current.type) {
15226  case PM_TOKEN_CONSTANT:
15227  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15228  parser_lex(parser);
15229  break;
15231  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15232  parser_lex(parser);
15233  break;
15235  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15236  parser_lex(parser);
15237  break;
15239  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15240  parser_lex(parser);
15241  break;
15242  default:
15243  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15244  break;
15245  }
15246 
15247  bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15248  pm_parser_local_add_token(parser, &parser->previous, 1);
15249 
15250  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15251  if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15252 
15253  pm_block_parameters_node_append_local(block_parameters, local);
15254  } while (accept1(parser, PM_TOKEN_COMMA));
15255  }
15256  }
15257 
15258  return block_parameters;
15259 }
15260 
15265 static bool
15266 outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15267  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15268  if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15269  }
15270 
15271  return false;
15272 }
15273 
15279 static const char * const pm_numbered_parameter_names[] = {
15280  "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15281 };
15282 
15288 static pm_node_t *
15289 parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15290  pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15291 
15292  // If we have ordinary parameters, then we will return them as the set of
15293  // parameters.
15294  if (parameters != NULL) {
15295  // If we also have implicit parameters, then this is an error.
15296  if (implicit_parameters->size > 0) {
15297  pm_node_t *node = implicit_parameters->nodes[0];
15298 
15300  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15301  } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15302  pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15303  } else {
15304  assert(false && "unreachable");
15305  }
15306  }
15307 
15308  return parameters;
15309  }
15310 
15311  // If we don't have any implicit parameters, then the set of parameters is
15312  // NULL.
15313  if (implicit_parameters->size == 0) {
15314  return NULL;
15315  }
15316 
15317  // If we don't have ordinary parameters, then we now must validate our set
15318  // of implicit parameters. We can only have numbered parameters or it, but
15319  // they cannot be mixed.
15320  uint8_t numbered_parameter = 0;
15321  bool it_parameter = false;
15322 
15323  for (size_t index = 0; index < implicit_parameters->size; index++) {
15324  pm_node_t *node = implicit_parameters->nodes[index];
15325 
15327  if (it_parameter) {
15328  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15329  } else if (outer_scope_using_numbered_parameters_p(parser)) {
15330  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15331  } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15332  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15333  } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15334  numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15335  } else {
15336  assert(false && "unreachable");
15337  }
15338  } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15339  if (numbered_parameter > 0) {
15340  pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15341  } else {
15342  it_parameter = true;
15343  }
15344  }
15345  }
15346 
15347  if (numbered_parameter > 0) {
15348  // Go through the parent scopes and mark them as being disallowed from
15349  // using numbered parameters because this inner scope is using them.
15350  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15351  scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15352  }
15353 
15354  const pm_location_t location = { .start = opening->start, .end = closing->end };
15355  return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15356  }
15357 
15358  if (it_parameter) {
15359  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15360  }
15361 
15362  return NULL;
15363 }
15364 
15368 static pm_block_node_t *
15369 parse_block(pm_parser_t *parser, uint16_t depth) {
15370  pm_token_t opening = parser->previous;
15371  accept1(parser, PM_TOKEN_NEWLINE);
15372 
15373  pm_accepts_block_stack_push(parser, true);
15374  pm_parser_scope_push(parser, false);
15375 
15376  pm_block_parameters_node_t *block_parameters = NULL;
15377 
15378  if (accept1(parser, PM_TOKEN_PIPE)) {
15379  pm_token_t block_parameters_opening = parser->previous;
15380  if (match1(parser, PM_TOKEN_PIPE)) {
15381  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15382  parser->command_start = true;
15383  parser_lex(parser);
15384  } else {
15385  block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15386  accept1(parser, PM_TOKEN_NEWLINE);
15387  parser->command_start = true;
15388  expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15389  }
15390 
15391  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15392  }
15393 
15394  accept1(parser, PM_TOKEN_NEWLINE);
15395  pm_node_t *statements = NULL;
15396 
15397  if (opening.type == PM_TOKEN_BRACE_LEFT) {
15398  if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15399  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15400  }
15401 
15402  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15403  } else {
15404  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15406  pm_accepts_block_stack_push(parser, true);
15407  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15408  pm_accepts_block_stack_pop(parser);
15409  }
15410 
15411  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15412  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15413  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15414  }
15415  }
15416 
15417  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15418  }
15419 
15420  pm_constant_id_list_t locals;
15421  pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15422  pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15423 
15424  pm_parser_scope_pop(parser);
15425  pm_accepts_block_stack_pop(parser);
15426 
15427  return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15428 }
15429 
15435 static bool
15436 parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15437  bool found = false;
15438 
15439  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15440  found |= true;
15441  arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15442 
15443  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15444  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15445  } else {
15446  pm_accepts_block_stack_push(parser, true);
15447  parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15448 
15449  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15450  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15451  parser->previous.start = parser->previous.end;
15452  parser->previous.type = PM_TOKEN_MISSING;
15453  }
15454 
15455  pm_accepts_block_stack_pop(parser);
15456  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15457  }
15458  } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15459  found |= true;
15460  pm_accepts_block_stack_push(parser, false);
15461 
15462  // If we get here, then the subsequent token cannot be used as an infix
15463  // operator. In this case we assume the subsequent token is part of an
15464  // argument to this method call.
15465  parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15466 
15467  // If we have done with the arguments and still not consumed the comma,
15468  // then we have a trailing comma where we need to check whether it is
15469  // allowed or not.
15470  if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15471  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15472  }
15473 
15474  pm_accepts_block_stack_pop(parser);
15475  }
15476 
15477  // If we're at the end of the arguments, we can now check if there is a block
15478  // node that starts with a {. If there is, then we can parse it and add it to
15479  // the arguments.
15480  if (accepts_block) {
15481  pm_block_node_t *block = NULL;
15482 
15483  if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15484  found |= true;
15485  block = parse_block(parser, (uint16_t) (depth + 1));
15486  pm_arguments_validate_block(parser, arguments, block);
15487  } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15488  found |= true;
15489  block = parse_block(parser, (uint16_t) (depth + 1));
15490  }
15491 
15492  if (block != NULL) {
15493  if (arguments->block == NULL && !arguments->has_forwarding) {
15494  arguments->block = (pm_node_t *) block;
15495  } else {
15496  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15497 
15498  if (arguments->block != NULL) {
15499  if (arguments->arguments == NULL) {
15500  arguments->arguments = pm_arguments_node_create(parser);
15501  }
15502  pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15503  }
15504  arguments->block = (pm_node_t *) block;
15505  }
15506  }
15507  }
15508 
15509  return found;
15510 }
15511 
15516 static void
15517 parse_return(pm_parser_t *parser, pm_node_t *node) {
15518  bool in_sclass = false;
15519  for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15520  switch (context_node->context) {
15521  case PM_CONTEXT_BEGIN_ELSE:
15524  case PM_CONTEXT_BEGIN:
15525  case PM_CONTEXT_CASE_IN:
15526  case PM_CONTEXT_CASE_WHEN:
15528  case PM_CONTEXT_DEFINED:
15529  case PM_CONTEXT_ELSE:
15530  case PM_CONTEXT_ELSIF:
15531  case PM_CONTEXT_EMBEXPR:
15532  case PM_CONTEXT_FOR_INDEX:
15533  case PM_CONTEXT_FOR:
15534  case PM_CONTEXT_IF:
15536  case PM_CONTEXT_MAIN:
15538  case PM_CONTEXT_PARENS:
15539  case PM_CONTEXT_POSTEXE:
15540  case PM_CONTEXT_PREDICATE:
15541  case PM_CONTEXT_PREEXE:
15543  case PM_CONTEXT_TERNARY:
15544  case PM_CONTEXT_UNLESS:
15545  case PM_CONTEXT_UNTIL:
15546  case PM_CONTEXT_WHILE:
15547  // Keep iterating up the lists of contexts, because returns can
15548  // see through these.
15549  continue;
15553  case PM_CONTEXT_SCLASS:
15554  in_sclass = true;
15555  continue;
15556  case PM_CONTEXT_CLASS_ELSE:
15559  case PM_CONTEXT_CLASS:
15563  case PM_CONTEXT_MODULE:
15564  // These contexts are invalid for a return.
15565  pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15566  return;
15568  case PM_CONTEXT_BLOCK_ELSE:
15572  case PM_CONTEXT_DEF_ELSE:
15573  case PM_CONTEXT_DEF_ENSURE:
15574  case PM_CONTEXT_DEF_PARAMS:
15575  case PM_CONTEXT_DEF_RESCUE:
15576  case PM_CONTEXT_DEF:
15582  // These contexts are valid for a return, and we should not
15583  // continue to loop.
15584  return;
15585  case PM_CONTEXT_NONE:
15586  // This case should never happen.
15587  assert(false && "unreachable");
15588  break;
15589  }
15590  }
15591  if (in_sclass) {
15592  pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15593  }
15594 }
15595 
15600 static void
15601 parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15602  for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15603  switch (context_node->context) {
15606  case PM_CONTEXT_BLOCK_ELSE:
15609  case PM_CONTEXT_DEFINED:
15610  case PM_CONTEXT_FOR:
15617  case PM_CONTEXT_POSTEXE:
15618  case PM_CONTEXT_UNTIL:
15619  case PM_CONTEXT_WHILE:
15620  // These are the good cases. We're allowed to have a block exit
15621  // in these contexts.
15622  return;
15623  case PM_CONTEXT_DEF:
15624  case PM_CONTEXT_DEF_PARAMS:
15625  case PM_CONTEXT_DEF_ELSE:
15626  case PM_CONTEXT_DEF_ENSURE:
15627  case PM_CONTEXT_DEF_RESCUE:
15628  case PM_CONTEXT_MAIN:
15629  case PM_CONTEXT_PREEXE:
15630  case PM_CONTEXT_SCLASS:
15634  // These are the bad cases. We're not allowed to have a block
15635  // exit in these contexts.
15636  //
15637  // If we get here, then we're about to mark this block exit
15638  // as invalid. However, it could later _become_ valid if we
15639  // find a trailing while/until on the expression. In this
15640  // case instead of adding the error here, we'll add the
15641  // block exit to the list of exits for the expression, and
15642  // the node parsing will handle validating it instead.
15643  assert(parser->current_block_exits != NULL);
15645  return;
15646  case PM_CONTEXT_BEGIN_ELSE:
15649  case PM_CONTEXT_BEGIN:
15650  case PM_CONTEXT_CASE_IN:
15651  case PM_CONTEXT_CASE_WHEN:
15652  case PM_CONTEXT_CLASS_ELSE:
15655  case PM_CONTEXT_CLASS:
15657  case PM_CONTEXT_ELSE:
15658  case PM_CONTEXT_ELSIF:
15659  case PM_CONTEXT_EMBEXPR:
15660  case PM_CONTEXT_FOR_INDEX:
15661  case PM_CONTEXT_IF:
15665  case PM_CONTEXT_MODULE:
15667  case PM_CONTEXT_PARENS:
15668  case PM_CONTEXT_PREDICATE:
15670  case PM_CONTEXT_TERNARY:
15671  case PM_CONTEXT_UNLESS:
15672  // In these contexts we should continue walking up the list of
15673  // contexts.
15674  break;
15675  case PM_CONTEXT_NONE:
15676  // This case should never happen.
15677  assert(false && "unreachable");
15678  break;
15679  }
15680  }
15681 }
15682 
15687 static pm_node_list_t *
15688 push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15689  pm_node_list_t *previous_block_exits = parser->current_block_exits;
15690  parser->current_block_exits = current_block_exits;
15691  return previous_block_exits;
15692 }
15693 
15699 static void
15700 flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15701  pm_node_t *block_exit;
15702  PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15703  const char *type;
15704 
15705  switch (PM_NODE_TYPE(block_exit)) {
15706  case PM_BREAK_NODE: type = "break"; break;
15707  case PM_NEXT_NODE: type = "next"; break;
15708  case PM_REDO_NODE: type = "redo"; break;
15709  default: assert(false && "unreachable"); type = ""; break;
15710  }
15711 
15712  PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15713  }
15714 
15715  parser->current_block_exits = previous_block_exits;
15716 }
15717 
15722 static void
15723 pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15725  // If we matched a trailing while/until, then all of the block exits in
15726  // the contained list are valid. In this case we do not need to do
15727  // anything.
15728  parser->current_block_exits = previous_block_exits;
15729  } else if (previous_block_exits != NULL) {
15730  // If we did not matching a trailing while/until, then all of the block
15731  // exits contained in the list are invalid for this specific context.
15732  // However, they could still become valid in a higher level context if
15733  // there is another list above this one. In this case we'll push all of
15734  // the block exits up to the previous list.
15735  pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15736  parser->current_block_exits = previous_block_exits;
15737  } else {
15738  // If we did not match a trailing while/until and this was the last
15739  // chance to do so, then all of the block exits in the list are invalid
15740  // and we need to add an error for each of them.
15741  flush_block_exits(parser, previous_block_exits);
15742  }
15743 }
15744 
15745 static inline pm_node_t *
15746 parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15747  context_push(parser, PM_CONTEXT_PREDICATE);
15748  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15749  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15750 
15751  // Predicates are closed by a term, a "then", or a term and then a "then".
15752  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15753 
15754  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15755  predicate_closed = true;
15756  *then_keyword = parser->previous;
15757  }
15758 
15759  if (!predicate_closed) {
15760  pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15761  }
15762 
15763  context_pop(parser);
15764  return predicate;
15765 }
15766 
15767 static inline pm_node_t *
15768 parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15769  pm_node_list_t current_block_exits = { 0 };
15770  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15771 
15772  pm_token_t keyword = parser->previous;
15773  pm_token_t then_keyword = not_provided(parser);
15774 
15775  pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15776  pm_statements_node_t *statements = NULL;
15777 
15779  pm_accepts_block_stack_push(parser, true);
15780  statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15781  pm_accepts_block_stack_pop(parser);
15782  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15783  }
15784 
15785  pm_token_t end_keyword = not_provided(parser);
15786  pm_node_t *parent = NULL;
15787 
15788  switch (context) {
15789  case PM_CONTEXT_IF:
15790  parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15791  break;
15792  case PM_CONTEXT_UNLESS:
15793  parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15794  break;
15795  default:
15796  assert(false && "unreachable");
15797  break;
15798  }
15799 
15800  pm_node_t *current = parent;
15801 
15802  // Parse any number of elsif clauses. This will form a linked list of if
15803  // nodes pointing to each other from the top.
15804  if (context == PM_CONTEXT_IF) {
15805  while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15806  if (parser_end_of_line_p(parser)) {
15807  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15808  }
15809 
15810  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15811  pm_token_t elsif_keyword = parser->current;
15812  parser_lex(parser);
15813 
15814  pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15815  pm_accepts_block_stack_push(parser, true);
15816 
15817  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15818  pm_accepts_block_stack_pop(parser);
15819  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15820 
15821  pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15822  ((pm_if_node_t *) current)->subsequent = elsif;
15823  current = elsif;
15824  }
15825  }
15826 
15827  if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15828  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15829  opening_newline_index = token_newline_index(parser);
15830 
15831  parser_lex(parser);
15832  pm_token_t else_keyword = parser->previous;
15833 
15834  pm_accepts_block_stack_push(parser, true);
15835  pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15836  pm_accepts_block_stack_pop(parser);
15837 
15838  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15839  parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15840  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15841 
15842  pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15843 
15844  switch (context) {
15845  case PM_CONTEXT_IF:
15846  ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15847  break;
15848  case PM_CONTEXT_UNLESS:
15849  ((pm_unless_node_t *) parent)->else_clause = else_node;
15850  break;
15851  default:
15852  assert(false && "unreachable");
15853  break;
15854  }
15855  } else {
15856  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15857  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15858  }
15859 
15860  // Set the appropriate end location for all of the nodes in the subtree.
15861  switch (context) {
15862  case PM_CONTEXT_IF: {
15863  pm_node_t *current = parent;
15864  bool recursing = true;
15865 
15866  while (recursing) {
15867  switch (PM_NODE_TYPE(current)) {
15868  case PM_IF_NODE:
15869  pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15870  current = ((pm_if_node_t *) current)->subsequent;
15871  recursing = current != NULL;
15872  break;
15873  case PM_ELSE_NODE:
15874  pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15875  recursing = false;
15876  break;
15877  default: {
15878  recursing = false;
15879  break;
15880  }
15881  }
15882  }
15883  break;
15884  }
15885  case PM_CONTEXT_UNLESS:
15886  pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15887  break;
15888  default:
15889  assert(false && "unreachable");
15890  break;
15891  }
15892 
15893  pop_block_exits(parser, previous_block_exits);
15894  pm_node_list_free(&current_block_exits);
15895 
15896  return parent;
15897 }
15898 
15903 #define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15904  case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15905  case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15906  case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15907  case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15908  case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15909  case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15910  case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15911  case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15912  case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15913  case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15914 
15919 #define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15920  case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15921  case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15922  case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15923  case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15924  case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15925  case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15926  case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15927 
15933 #define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15934  case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15935  case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15936  case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15937  case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15938  case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15939  case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15940  case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15941  case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15942 
15947 #define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15948  case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15949  case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15950  case PM_TOKEN_CLASS_VARIABLE
15951 
15956 #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15957  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15958  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15959  case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15960 
15961 // Assert here that the flags are the same so that we can safely switch the type
15962 // of the node without having to move the flags.
15963 PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15964 
15969 static inline pm_node_flags_t
15970 parse_unescaped_encoding(const pm_parser_t *parser) {
15971  if (parser->explicit_encoding != NULL) {
15972  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
15973  // If the there's an explicit encoding and it's using a UTF-8 escape
15974  // sequence, then mark the string as UTF-8.
15976  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15977  // If there's a non-UTF-8 escape sequence being used, then the
15978  // string uses the source encoding, unless the source is marked as
15979  // US-ASCII. In that case the string is forced as ASCII-8BIT in
15980  // order to keep the string valid.
15982  }
15983  }
15984  return 0;
15985 }
15986 
15991 static pm_node_t *
15992 parse_string_part(pm_parser_t *parser, uint16_t depth) {
15993  switch (parser->current.type) {
15994  // Here the lexer has returned to us plain string content. In this case
15995  // we'll create a string node that has no opening or closing and return that
15996  // as the part. These kinds of parts look like:
15997  //
15998  // "aaa #{bbb} #@ccc ddd"
15999  // ^^^^ ^ ^^^^
16000  case PM_TOKEN_STRING_CONTENT: {
16001  pm_token_t opening = not_provided(parser);
16002  pm_token_t closing = not_provided(parser);
16003 
16004  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16005  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16006 
16007  parser_lex(parser);
16008  return node;
16009  }
16010  // Here the lexer has returned the beginning of an embedded expression. In
16011  // that case we'll parse the inner statements and return that as the part.
16012  // These kinds of parts look like:
16013  //
16014  // "aaa #{bbb} #@ccc ddd"
16015  // ^^^^^^
16016  case PM_TOKEN_EMBEXPR_BEGIN: {
16017  // Ruby disallows seeing encoding around interpolation in strings,
16018  // even though it is known at parse time.
16019  parser->explicit_encoding = NULL;
16020 
16021  pm_lex_state_t state = parser->lex_state;
16022  int brace_nesting = parser->brace_nesting;
16023 
16024  parser->brace_nesting = 0;
16025  lex_state_set(parser, PM_LEX_STATE_BEG);
16026  parser_lex(parser);
16027 
16028  pm_token_t opening = parser->previous;
16029  pm_statements_node_t *statements = NULL;
16030 
16031  if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16032  pm_accepts_block_stack_push(parser, true);
16033  statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16034  pm_accepts_block_stack_pop(parser);
16035  }
16036 
16037  parser->brace_nesting = brace_nesting;
16038  lex_state_set(parser, state);
16039 
16040  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16041  pm_token_t closing = parser->previous;
16042 
16043  // If this set of embedded statements only contains a single
16044  // statement, then Ruby does not consider it as a possible statement
16045  // that could emit a line event.
16046  if (statements != NULL && statements->body.size == 1) {
16047  pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16048  }
16049 
16050  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16051  }
16052 
16053  // Here the lexer has returned the beginning of an embedded variable.
16054  // In that case we'll parse the variable and create an appropriate node
16055  // for it and then return that node. These kinds of parts look like:
16056  //
16057  // "aaa #{bbb} #@ccc ddd"
16058  // ^^^^^
16059  case PM_TOKEN_EMBVAR: {
16060  // Ruby disallows seeing encoding around interpolation in strings,
16061  // even though it is known at parse time.
16062  parser->explicit_encoding = NULL;
16063 
16064  lex_state_set(parser, PM_LEX_STATE_BEG);
16065  parser_lex(parser);
16066 
16067  pm_token_t operator = parser->previous;
16068  pm_node_t *variable;
16069 
16070  switch (parser->current.type) {
16071  // In this case a back reference is being interpolated. We'll
16072  // create a global variable read node.
16074  parser_lex(parser);
16075  variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16076  break;
16077  // In this case an nth reference is being interpolated. We'll
16078  // create a global variable read node.
16080  parser_lex(parser);
16081  variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16082  break;
16083  // In this case a global variable is being interpolated. We'll
16084  // create a global variable read node.
16086  parser_lex(parser);
16087  variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16088  break;
16089  // In this case an instance variable is being interpolated.
16090  // We'll create an instance variable read node.
16092  parser_lex(parser);
16093  variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16094  break;
16095  // In this case a class variable is being interpolated. We'll
16096  // create a class variable read node.
16098  parser_lex(parser);
16099  variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16100  break;
16101  // We can hit here if we got an invalid token. In that case
16102  // we'll not attempt to lex this token and instead just return a
16103  // missing node.
16104  default:
16105  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16106  variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16107  break;
16108  }
16109 
16110  return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16111  }
16112  default:
16113  parser_lex(parser);
16114  pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16115  return NULL;
16116  }
16117 }
16118 
16124 static const uint8_t *
16125 parse_operator_symbol_name(const pm_token_t *name) {
16126  switch (name->type) {
16127  case PM_TOKEN_TILDE:
16128  case PM_TOKEN_BANG:
16129  if (name->end[-1] == '@') return name->end - 1;
16130  /* fallthrough */
16131  default:
16132  return name->end;
16133  }
16134 }
16135 
16136 static pm_node_t *
16137 parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16138  pm_token_t closing = not_provided(parser);
16139  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16140 
16141  const uint8_t *end = parse_operator_symbol_name(&parser->current);
16142 
16143  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16144  parser_lex(parser);
16145 
16146  pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16147  pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16148 
16149  return (pm_node_t *) symbol;
16150 }
16151 
16157 static pm_node_t *
16158 parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16159  const pm_token_t opening = parser->previous;
16160 
16161  if (lex_mode->mode != PM_LEX_STRING) {
16162  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16163 
16164  switch (parser->current.type) {
16165  case PM_CASE_OPERATOR:
16166  return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16167  case PM_TOKEN_IDENTIFIER:
16168  case PM_TOKEN_CONSTANT:
16170  case PM_TOKEN_METHOD_NAME:
16175  case PM_CASE_KEYWORD:
16176  parser_lex(parser);
16177  break;
16178  default:
16179  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16180  break;
16181  }
16182 
16183  pm_token_t closing = not_provided(parser);
16184  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16185 
16186  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16187  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16188 
16189  return (pm_node_t *) symbol;
16190  }
16191 
16192  if (lex_mode->as.string.interpolation) {
16193  // If we have the end of the symbol, then we can return an empty symbol.
16194  if (match1(parser, PM_TOKEN_STRING_END)) {
16195  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16196  parser_lex(parser);
16197 
16198  pm_token_t content = not_provided(parser);
16199  pm_token_t closing = parser->previous;
16200  return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16201  }
16202 
16203  // Now we can parse the first part of the symbol.
16204  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16205 
16206  // If we got a string part, then it's possible that we could transform
16207  // what looks like an interpolated symbol into a regular symbol.
16208  if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16209  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16210  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16211 
16212  return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16213  }
16214 
16215  pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16216  if (part) pm_interpolated_symbol_node_append(symbol, part);
16217 
16218  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16219  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16220  pm_interpolated_symbol_node_append(symbol, part);
16221  }
16222  }
16223 
16224  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16225  if (match1(parser, PM_TOKEN_EOF)) {
16226  pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16227  } else {
16228  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16229  }
16230 
16231  pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16232  return (pm_node_t *) symbol;
16233  }
16234 
16235  pm_token_t content;
16236  pm_string_t unescaped;
16237 
16238  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16239  content = parser->current;
16240  unescaped = parser->current_string;
16241  parser_lex(parser);
16242 
16243  // If we have two string contents in a row, then the content of this
16244  // symbol is split because of heredoc contents. This looks like:
16245  //
16246  // <<A; :'a
16247  // A
16248  // b'
16249  //
16250  // In this case, the best way we have to represent this is as an
16251  // interpolated string node, so that's what we'll do here.
16252  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16253  pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16254  pm_token_t bounds = not_provided(parser);
16255 
16256  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16257  pm_interpolated_symbol_node_append(symbol, part);
16258 
16259  part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16260  pm_interpolated_symbol_node_append(symbol, part);
16261 
16262  if (next_state != PM_LEX_STATE_NONE) {
16263  lex_state_set(parser, next_state);
16264  }
16265 
16266  parser_lex(parser);
16267  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16268 
16269  pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16270  return (pm_node_t *) symbol;
16271  }
16272  } else {
16273  content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16274  pm_string_shared_init(&unescaped, content.start, content.end);
16275  }
16276 
16277  if (next_state != PM_LEX_STATE_NONE) {
16278  lex_state_set(parser, next_state);
16279  }
16280 
16281  if (match1(parser, PM_TOKEN_EOF)) {
16282  pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16283  } else {
16284  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16285  }
16286 
16287  return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16288 }
16289 
16294 static inline pm_node_t *
16295 parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16296  switch (parser->current.type) {
16297  case PM_CASE_OPERATOR: {
16298  const pm_token_t opening = not_provided(parser);
16299  return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16300  }
16301  case PM_CASE_KEYWORD:
16302  case PM_TOKEN_CONSTANT:
16303  case PM_TOKEN_IDENTIFIER:
16304  case PM_TOKEN_METHOD_NAME: {
16305  parser_lex(parser);
16306 
16307  pm_token_t opening = not_provided(parser);
16308  pm_token_t closing = not_provided(parser);
16309  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16310 
16311  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16312  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16313 
16314  return (pm_node_t *) symbol;
16315  }
16316  case PM_TOKEN_SYMBOL_BEGIN: {
16317  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16318  parser_lex(parser);
16319 
16320  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16321  }
16322  default:
16323  pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16324  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16325  }
16326 }
16327 
16334 static inline pm_node_t *
16335 parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16336  switch (parser->current.type) {
16337  case PM_CASE_OPERATOR: {
16338  const pm_token_t opening = not_provided(parser);
16339  return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16340  }
16341  case PM_CASE_KEYWORD:
16342  case PM_TOKEN_CONSTANT:
16343  case PM_TOKEN_IDENTIFIER:
16344  case PM_TOKEN_METHOD_NAME: {
16345  if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16346  parser_lex(parser);
16347 
16348  pm_token_t opening = not_provided(parser);
16349  pm_token_t closing = not_provided(parser);
16350  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16351 
16352  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16353  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16354 
16355  return (pm_node_t *) symbol;
16356  }
16357  case PM_TOKEN_SYMBOL_BEGIN: {
16358  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16359  parser_lex(parser);
16360 
16361  return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16362  }
16364  parser_lex(parser);
16365  return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16367  parser_lex(parser);
16368  return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16370  parser_lex(parser);
16371  return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16372  default:
16373  pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16374  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16375  }
16376 }
16377 
16382 static pm_node_t *
16383 parse_variable(pm_parser_t *parser) {
16384  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16385  int depth;
16386 
16387  if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16388  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16389  }
16390 
16391  pm_scope_t *current_scope = parser->current_scope;
16392  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16393  if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16394  // When you use a numbered parameter, it implies the existence of
16395  // all of the locals that exist before it. For example, referencing
16396  // _2 means that _1 must exist. Therefore here we loop through all
16397  // of the possibilities and add them into the constant pool.
16398  uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16399  for (uint8_t number = 1; number <= maximum; number++) {
16400  pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16401  }
16402 
16403  if (!match1(parser, PM_TOKEN_EQUAL)) {
16404  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16405  }
16406 
16407  pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16408  pm_node_list_append(&current_scope->implicit_parameters, node);
16409 
16410  return node;
16411  } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16412  pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16413  pm_node_list_append(&current_scope->implicit_parameters, node);
16414 
16415  return node;
16416  }
16417  }
16418 
16419  return NULL;
16420 }
16421 
16425 static pm_node_t *
16426 parse_variable_call(pm_parser_t *parser) {
16427  pm_node_flags_t flags = 0;
16428 
16429  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16430  pm_node_t *node = parse_variable(parser);
16431  if (node != NULL) return node;
16433  }
16434 
16435  pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16436  pm_node_flag_set((pm_node_t *)node, flags);
16437 
16438  return (pm_node_t *) node;
16439 }
16440 
16446 static inline pm_token_t
16447 parse_method_definition_name(pm_parser_t *parser) {
16448  switch (parser->current.type) {
16449  case PM_CASE_KEYWORD:
16450  case PM_TOKEN_CONSTANT:
16451  case PM_TOKEN_METHOD_NAME:
16452  parser_lex(parser);
16453  return parser->previous;
16454  case PM_TOKEN_IDENTIFIER:
16455  pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16456  parser_lex(parser);
16457  return parser->previous;
16458  case PM_CASE_OPERATOR:
16459  lex_state_set(parser, PM_LEX_STATE_ENDFN);
16460  parser_lex(parser);
16461  return parser->previous;
16462  default:
16463  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16464  return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16465  }
16466 }
16467 
16468 static void
16469 parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16470  // Get a reference to the string struct that is being held by the string
16471  // node. This is the value we're going to actually manipulate.
16472  pm_string_ensure_owned(string);
16473 
16474  // Now get the bounds of the existing string. We'll use this as a
16475  // destination to move bytes into. We'll also use it for bounds checking
16476  // since we don't require that these strings be null terminated.
16477  size_t dest_length = pm_string_length(string);
16478  const uint8_t *source_cursor = (uint8_t *) string->source;
16479  const uint8_t *source_end = source_cursor + dest_length;
16480 
16481  // We're going to move bytes backward in the string when we get leading
16482  // whitespace, so we'll maintain a pointer to the current position in the
16483  // string that we're writing to.
16484  size_t trimmed_whitespace = 0;
16485 
16486  // While we haven't reached the amount of common whitespace that we need to
16487  // trim and we haven't reached the end of the string, we'll keep trimming
16488  // whitespace. Trimming in this context means skipping over these bytes such
16489  // that they aren't copied into the new string.
16490  while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16491  if (*source_cursor == '\t') {
16492  trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16493  if (trimmed_whitespace > common_whitespace) break;
16494  } else {
16495  trimmed_whitespace++;
16496  }
16497 
16498  source_cursor++;
16499  dest_length--;
16500  }
16501 
16502  memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16503  string->length = dest_length;
16504 }
16505 
16509 static void
16510 parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16511  // The next node should be dedented if it's the first node in the list or if
16512  // it follows a string node.
16513  bool dedent_next = true;
16514 
16515  // Iterate over all nodes, and trim whitespace accordingly. We're going to
16516  // keep around two indices: a read and a write. If we end up trimming all of
16517  // the whitespace from a node, then we'll drop it from the list entirely.
16518  size_t write_index = 0;
16519 
16520  pm_node_t *node;
16521  PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16522  // We're not manipulating child nodes that aren't strings. In this case
16523  // we'll skip past it and indicate that the subsequent node should not
16524  // be dedented.
16525  if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16526  nodes->nodes[write_index++] = node;
16527  dedent_next = false;
16528  continue;
16529  }
16530 
16531  pm_string_node_t *string_node = ((pm_string_node_t *) node);
16532  if (dedent_next) {
16533  parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16534  }
16535 
16536  if (string_node->unescaped.length == 0) {
16537  pm_node_destroy(parser, node);
16538  } else {
16539  nodes->nodes[write_index++] = node;
16540  }
16541 
16542  // We always dedent the next node if it follows a string node.
16543  dedent_next = true;
16544  }
16545 
16546  nodes->size = write_index;
16547 }
16548 
16552 static pm_token_t
16553 parse_strings_empty_content(const uint8_t *location) {
16554  return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16555 }
16556 
16560 static inline pm_node_t *
16561 parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16562  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16563  bool concating = false;
16564 
16565  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16566  pm_node_t *node = NULL;
16567 
16568  // Here we have found a string literal. We'll parse it and add it to
16569  // the list of strings.
16570  const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16571  assert(lex_mode->mode == PM_LEX_STRING);
16572  bool lex_interpolation = lex_mode->as.string.interpolation;
16573  bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16574 
16575  pm_token_t opening = parser->current;
16576  parser_lex(parser);
16577 
16578  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16579  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16580  // If we get here, then we have an end immediately after a
16581  // start. In that case we'll create an empty content token and
16582  // return an uninterpolated string.
16583  pm_token_t content = parse_strings_empty_content(parser->previous.start);
16584  pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16585 
16586  pm_string_shared_init(&string->unescaped, content.start, content.end);
16587  node = (pm_node_t *) string;
16588  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16589  // If we get here, then we have an end of a label immediately
16590  // after a start. In that case we'll create an empty symbol
16591  // node.
16592  pm_token_t content = parse_strings_empty_content(parser->previous.start);
16593  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16594 
16595  pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16596  node = (pm_node_t *) symbol;
16597 
16598  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16599  } else if (!lex_interpolation) {
16600  // If we don't accept interpolation then we expect the string to
16601  // start with a single string content node.
16602  pm_string_t unescaped;
16603  pm_token_t content;
16604 
16605  if (match1(parser, PM_TOKEN_EOF)) {
16606  unescaped = PM_STRING_EMPTY;
16607  content = not_provided(parser);
16608  } else {
16609  unescaped = parser->current_string;
16610  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16611  content = parser->previous;
16612  }
16613 
16614  // It is unfortunately possible to have multiple string content
16615  // nodes in a row in the case that there's heredoc content in
16616  // the middle of the string, like this cursed example:
16617  //
16618  // <<-END+'b
16619  // a
16620  // END
16621  // c'+'d'
16622  //
16623  // In that case we need to switch to an interpolated string to
16624  // be able to contain all of the parts.
16625  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16626  pm_node_list_t parts = { 0 };
16627 
16628  pm_token_t delimiters = not_provided(parser);
16629  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16630  pm_node_list_append(&parts, part);
16631 
16632  do {
16633  part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16634  pm_node_list_append(&parts, part);
16635  parser_lex(parser);
16636  } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16637 
16638  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16639  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16640 
16641  pm_node_list_free(&parts);
16642  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16643  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16644  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16645  } else if (match1(parser, PM_TOKEN_EOF)) {
16646  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16647  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16648  } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16649  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16650  } else {
16651  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16652  parser->previous.start = parser->previous.end;
16653  parser->previous.type = PM_TOKEN_MISSING;
16654  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16655  }
16656  } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16657  // In this case we've hit string content so we know the string
16658  // at least has something in it. We'll need to check if the
16659  // following token is the end (in which case we can return a
16660  // plain string) or if it's not then it has interpolation.
16661  pm_token_t content = parser->current;
16662  pm_string_t unescaped = parser->current_string;
16663  parser_lex(parser);
16664 
16665  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16666  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16667  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16668 
16669  // Kind of odd behavior, but basically if we have an
16670  // unterminated string and it ends in a newline, we back up one
16671  // character so that the error message is on the last line of
16672  // content in the string.
16673  if (!accept1(parser, PM_TOKEN_STRING_END)) {
16674  const uint8_t *location = parser->previous.end;
16675  if (location > parser->start && location[-1] == '\n') location--;
16676  pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16677 
16678  parser->previous.start = parser->previous.end;
16679  parser->previous.type = PM_TOKEN_MISSING;
16680  }
16681  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16682  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16683  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16684  } else {
16685  // If we get here, then we have interpolation so we'll need
16686  // to create a string or symbol node with interpolation.
16687  pm_node_list_t parts = { 0 };
16688  pm_token_t string_opening = not_provided(parser);
16689  pm_token_t string_closing = not_provided(parser);
16690 
16691  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16692  pm_node_flag_set(part, parse_unescaped_encoding(parser));
16693  pm_node_list_append(&parts, part);
16694 
16695  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16696  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16697  pm_node_list_append(&parts, part);
16698  }
16699  }
16700 
16701  if (accept1(parser, PM_TOKEN_LABEL_END)) {
16702  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16703  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16704  } else if (match1(parser, PM_TOKEN_EOF)) {
16705  pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16706  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16707  } else {
16708  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16709  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16710  }
16711 
16712  pm_node_list_free(&parts);
16713  }
16714  } else {
16715  // If we get here, then the first part of the string is not plain
16716  // string content, in which case we need to parse the string as an
16717  // interpolated string.
16718  pm_node_list_t parts = { 0 };
16719  pm_node_t *part;
16720 
16721  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16722  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16723  pm_node_list_append(&parts, part);
16724  }
16725  }
16726 
16727  if (accept1(parser, PM_TOKEN_LABEL_END)) {
16728  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16729  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16730  } else if (match1(parser, PM_TOKEN_EOF)) {
16731  pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16732  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16733  } else {
16734  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16735  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16736  }
16737 
16738  pm_node_list_free(&parts);
16739  }
16740 
16741  if (current == NULL) {
16742  // If the node we just parsed is a symbol node, then we can't
16743  // concatenate it with anything else, so we can now return that
16744  // node.
16746  return node;
16747  }
16748 
16749  // If we don't already have a node, then it's fine and we can just
16750  // set the result to be the node we just parsed.
16751  current = node;
16752  } else {
16753  // Otherwise we need to check the type of the node we just parsed.
16754  // If it cannot be concatenated with the previous node, then we'll
16755  // need to add a syntax error.
16757  pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16758  }
16759 
16760  // If we haven't already created our container for concatenation,
16761  // we'll do that now.
16762  if (!concating) {
16763  concating = true;
16764  pm_token_t bounds = not_provided(parser);
16765 
16766  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16767  pm_interpolated_string_node_append(container, current);
16768  current = (pm_node_t *) container;
16769  }
16770 
16771  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16772  }
16773  }
16774 
16775  return current;
16776 }
16777 
16778 #define PM_PARSE_PATTERN_SINGLE 0
16779 #define PM_PARSE_PATTERN_TOP 1
16780 #define PM_PARSE_PATTERN_MULTI 2
16781 
16782 static pm_node_t *
16783 parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16784 
16790 static void
16791 parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16792  // Skip this capture if it starts with an underscore.
16793  if (*location->start == '_') return;
16794 
16795  if (pm_constant_id_list_includes(captures, capture)) {
16796  pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16797  } else {
16798  pm_constant_id_list_append(captures, capture);
16799  }
16800 }
16801 
16805 static pm_node_t *
16806 parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16807  // Now, if there are any :: operators that follow, parse them as constant
16808  // path nodes.
16809  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16810  pm_token_t delimiter = parser->previous;
16811  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16812  node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16813  }
16814 
16815  // If there is a [ or ( that follows, then this is part of a larger pattern
16816  // expression. We'll parse the inner pattern here, then modify the returned
16817  // inner pattern with our constant path attached.
16818  if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16819  return node;
16820  }
16821 
16822  pm_token_t opening;
16823  pm_token_t closing;
16824  pm_node_t *inner = NULL;
16825 
16826  if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16827  opening = parser->previous;
16828  accept1(parser, PM_TOKEN_NEWLINE);
16829 
16830  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16831  inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16832  accept1(parser, PM_TOKEN_NEWLINE);
16833  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16834  }
16835 
16836  closing = parser->previous;
16837  } else {
16838  parser_lex(parser);
16839  opening = parser->previous;
16840  accept1(parser, PM_TOKEN_NEWLINE);
16841 
16842  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16843  inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16844  accept1(parser, PM_TOKEN_NEWLINE);
16845  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16846  }
16847 
16848  closing = parser->previous;
16849  }
16850 
16851  if (!inner) {
16852  // If there was no inner pattern, then we have something like Foo() or
16853  // Foo[]. In that case we'll create an array pattern with no requireds.
16854  return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16855  }
16856 
16857  // Now that we have the inner pattern, check to see if it's an array, find,
16858  // or hash pattern. If it is, then we'll attach our constant path to it if
16859  // it doesn't already have a constant. If it's not one of those node types
16860  // or it does have a constant, then we'll create an array pattern.
16861  switch (PM_NODE_TYPE(inner)) {
16862  case PM_ARRAY_PATTERN_NODE: {
16863  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16864 
16865  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16866  pattern_node->base.location.start = node->location.start;
16867  pattern_node->base.location.end = closing.end;
16868 
16869  pattern_node->constant = node;
16870  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16871  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16872 
16873  return (pm_node_t *) pattern_node;
16874  }
16875 
16876  break;
16877  }
16878  case PM_FIND_PATTERN_NODE: {
16879  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16880 
16881  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16882  pattern_node->base.location.start = node->location.start;
16883  pattern_node->base.location.end = closing.end;
16884 
16885  pattern_node->constant = node;
16886  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16887  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16888 
16889  return (pm_node_t *) pattern_node;
16890  }
16891 
16892  break;
16893  }
16894  case PM_HASH_PATTERN_NODE: {
16895  pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16896 
16897  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16898  pattern_node->base.location.start = node->location.start;
16899  pattern_node->base.location.end = closing.end;
16900 
16901  pattern_node->constant = node;
16902  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16903  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16904 
16905  return (pm_node_t *) pattern_node;
16906  }
16907 
16908  break;
16909  }
16910  default:
16911  break;
16912  }
16913 
16914  // If we got here, then we didn't return one of the inner patterns by
16915  // attaching its constant. In this case we'll create an array pattern and
16916  // attach our constant to it.
16917  pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16918  pm_array_pattern_node_requireds_append(pattern_node, inner);
16919  return (pm_node_t *) pattern_node;
16920 }
16921 
16925 static pm_splat_node_t *
16926 parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16927  assert(parser->previous.type == PM_TOKEN_USTAR);
16928  pm_token_t operator = parser->previous;
16929  pm_node_t *name = NULL;
16930 
16931  // Rest patterns don't necessarily have a name associated with them. So we
16932  // will check for that here. If they do, then we'll add it to the local
16933  // table since this pattern will cause it to become a local variable.
16934  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16935  pm_token_t identifier = parser->previous;
16936  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16937 
16938  int depth;
16939  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16940  pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16941  }
16942 
16943  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16944  name = (pm_node_t *) pm_local_variable_target_node_create(
16945  parser,
16946  &PM_LOCATION_TOKEN_VALUE(&identifier),
16947  constant_id,
16948  (uint32_t) (depth == -1 ? 0 : depth)
16949  );
16950  }
16951 
16952  // Finally we can return the created node.
16953  return pm_splat_node_create(parser, &operator, name);
16954 }
16955 
16959 static pm_node_t *
16960 parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16961  assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16962  parser_lex(parser);
16963 
16964  pm_token_t operator = parser->previous;
16965  pm_node_t *value = NULL;
16966 
16967  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16968  return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
16969  }
16970 
16971  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16972  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16973 
16974  int depth;
16975  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16976  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16977  }
16978 
16979  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16980  value = (pm_node_t *) pm_local_variable_target_node_create(
16981  parser,
16982  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16983  constant_id,
16984  (uint32_t) (depth == -1 ? 0 : depth)
16985  );
16986  }
16987 
16988  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
16989 }
16990 
16995 static bool
16996 pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16997  ptrdiff_t length = end - start;
16998  if (length == 0) return false;
16999 
17000  // First ensure that it starts with a valid identifier starting character.
17001  size_t width = char_is_identifier_start(parser, start);
17002  if (width == 0) return false;
17003 
17004  // Next, ensure that it's not an uppercase character.
17005  if (parser->encoding_changed) {
17006  if (parser->encoding->isupper_char(start, length)) return false;
17007  } else {
17008  if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17009  }
17010 
17011  // Next, iterate through all of the bytes of the string to ensure that they
17012  // are all valid identifier characters.
17013  const uint8_t *cursor = start + width;
17014  while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
17015  return cursor == end;
17016 }
17017 
17022 static pm_node_t *
17023 parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17024  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17025 
17026  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17027  int depth = -1;
17028 
17029  if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17030  depth = pm_parser_local_depth_constant_id(parser, constant_id);
17031  } else {
17032  pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17033 
17034  if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17035  PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17036  }
17037  }
17038 
17039  if (depth == -1) {
17040  pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17041  }
17042 
17043  parse_pattern_capture(parser, captures, constant_id, value_loc);
17044  pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17045  parser,
17046  value_loc,
17047  constant_id,
17048  (uint32_t) (depth == -1 ? 0 : depth)
17049  );
17050 
17051  return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17052 }
17053 
17058 static void
17059 parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17060  if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17061  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17062  }
17063 }
17064 
17068 static pm_hash_pattern_node_t *
17069 parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17070  pm_node_list_t assocs = { 0 };
17071  pm_static_literals_t keys = { 0 };
17072  pm_node_t *rest = NULL;
17073 
17074  switch (PM_NODE_TYPE(first_node)) {
17075  case PM_ASSOC_SPLAT_NODE:
17077  rest = first_node;
17078  break;
17079  case PM_SYMBOL_NODE: {
17080  if (pm_symbol_node_label_p(first_node)) {
17081  parse_pattern_hash_key(parser, &keys, first_node);
17082  pm_node_t *value;
17083 
17085  // Otherwise, we will create an implicit local variable
17086  // target for the value.
17087  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17088  } else {
17089  // Here we have a value for the first assoc in the list, so
17090  // we will parse it now.
17091  value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17092  }
17093 
17094  pm_token_t operator = not_provided(parser);
17095  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17096 
17097  pm_node_list_append(&assocs, assoc);
17098  break;
17099  }
17100  }
17101  /* fallthrough */
17102  default: {
17103  // If we get anything else, then this is an error. For this we'll
17104  // create a missing node for the value and create an assoc node for
17105  // the first node in the list.
17106  pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17107  pm_parser_err_node(parser, first_node, diag_id);
17108 
17109  pm_token_t operator = not_provided(parser);
17110  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17111  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17112 
17113  pm_node_list_append(&assocs, assoc);
17114  break;
17115  }
17116  }
17117 
17118  // If there are any other assocs, then we'll parse them now.
17119  while (accept1(parser, PM_TOKEN_COMMA)) {
17120  // Here we need to break to support trailing commas.
17122  // Trailing commas are not allowed to follow a rest pattern.
17123  if (rest != NULL) {
17124  pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17125  }
17126 
17127  break;
17128  }
17129 
17130  if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17131  pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17132 
17133  if (rest == NULL) {
17134  rest = assoc;
17135  } else {
17136  pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17137  pm_node_list_append(&assocs, assoc);
17138  }
17139  } else {
17140  pm_node_t *key;
17141 
17142  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17143  key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17144 
17146  pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17147  } else if (!pm_symbol_node_label_p(key)) {
17148  pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17149  }
17150  } else {
17151  expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17152  key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17153  }
17154 
17155  parse_pattern_hash_key(parser, &keys, key);
17156  pm_node_t *value = NULL;
17157 
17159  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17160  } else {
17161  value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17162  }
17163 
17164  pm_token_t operator = not_provided(parser);
17165  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17166 
17167  if (rest != NULL) {
17168  pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17169  }
17170 
17171  pm_node_list_append(&assocs, assoc);
17172  }
17173  }
17174 
17175  pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17176  xfree(assocs.nodes);
17177 
17178  pm_static_literals_free(&keys);
17179  return node;
17180 }
17181 
17185 static pm_node_t *
17186 parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17187  switch (parser->current.type) {
17188  case PM_TOKEN_IDENTIFIER:
17189  case PM_TOKEN_METHOD_NAME: {
17190  parser_lex(parser);
17191  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17192 
17193  int depth;
17194  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17195  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17196  }
17197 
17198  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17199  return (pm_node_t *) pm_local_variable_target_node_create(
17200  parser,
17201  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17202  constant_id,
17203  (uint32_t) (depth == -1 ? 0 : depth)
17204  );
17205  }
17207  pm_token_t opening = parser->current;
17208  parser_lex(parser);
17209 
17210  if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17211  // If we have an empty array pattern, then we'll just return a new
17212  // array pattern node.
17213  return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17214  }
17215 
17216  // Otherwise, we'll parse the inner pattern, then deal with it depending
17217  // on the type it returns.
17218  pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17219 
17220  accept1(parser, PM_TOKEN_NEWLINE);
17221  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17222  pm_token_t closing = parser->previous;
17223 
17224  switch (PM_NODE_TYPE(inner)) {
17225  case PM_ARRAY_PATTERN_NODE: {
17226  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17227  if (pattern_node->opening_loc.start == NULL) {
17228  pattern_node->base.location.start = opening.start;
17229  pattern_node->base.location.end = closing.end;
17230 
17231  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17232  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17233 
17234  return (pm_node_t *) pattern_node;
17235  }
17236 
17237  break;
17238  }
17239  case PM_FIND_PATTERN_NODE: {
17240  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17241  if (pattern_node->opening_loc.start == NULL) {
17242  pattern_node->base.location.start = opening.start;
17243  pattern_node->base.location.end = closing.end;
17244 
17245  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17246  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17247 
17248  return (pm_node_t *) pattern_node;
17249  }
17250 
17251  break;
17252  }
17253  default:
17254  break;
17255  }
17256 
17257  pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17258  pm_array_pattern_node_requireds_append(node, inner);
17259  return (pm_node_t *) node;
17260  }
17261  case PM_TOKEN_BRACE_LEFT: {
17262  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17263  parser->pattern_matching_newlines = false;
17264 
17265  pm_hash_pattern_node_t *node;
17266  pm_token_t opening = parser->current;
17267  parser_lex(parser);
17268 
17269  if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17270  // If we have an empty hash pattern, then we'll just return a new hash
17271  // pattern node.
17272  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17273  } else {
17274  pm_node_t *first_node;
17275 
17276  switch (parser->current.type) {
17277  case PM_TOKEN_LABEL:
17278  parser_lex(parser);
17279  first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17280  break;
17281  case PM_TOKEN_USTAR_STAR:
17282  first_node = parse_pattern_keyword_rest(parser, captures);
17283  break;
17284  case PM_TOKEN_STRING_BEGIN:
17285  first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17286  break;
17287  default: {
17288  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17289  parser_lex(parser);
17290 
17291  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17292  break;
17293  }
17294  }
17295 
17296  node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17297 
17298  accept1(parser, PM_TOKEN_NEWLINE);
17299  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17300  pm_token_t closing = parser->previous;
17301 
17302  node->base.location.start = opening.start;
17303  node->base.location.end = closing.end;
17304 
17305  node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17306  node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17307  }
17308 
17309  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17310  return (pm_node_t *) node;
17311  }
17312  case PM_TOKEN_UDOT_DOT:
17313  case PM_TOKEN_UDOT_DOT_DOT: {
17314  pm_token_t operator = parser->current;
17315  parser_lex(parser);
17316 
17317  // Since we have a unary range operator, we need to parse the subsequent
17318  // expression as the right side of the range.
17319  switch (parser->current.type) {
17320  case PM_CASE_PRIMITIVE: {
17321  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17322  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17323  }
17324  default: {
17325  pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17326  pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17327  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17328  }
17329  }
17330  }
17331  case PM_CASE_PRIMITIVE: {
17332  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17333 
17334  // If we found a label, we need to immediately return to the caller.
17335  if (pm_symbol_node_label_p(node)) return node;
17336 
17337  // Now that we have a primitive, we need to check if it's part of a range.
17338  if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17339  pm_token_t operator = parser->previous;
17340 
17341  // Now that we have the operator, we need to check if this is followed
17342  // by another expression. If it is, then we will create a full range
17343  // node. Otherwise, we'll create an endless range.
17344  switch (parser->current.type) {
17345  case PM_CASE_PRIMITIVE: {
17346  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17347  return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17348  }
17349  default:
17350  return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17351  }
17352  }
17353 
17354  return node;
17355  }
17356  case PM_TOKEN_CARET: {
17357  parser_lex(parser);
17358  pm_token_t operator = parser->previous;
17359 
17360  // At this point we have a pin operator. We need to check the subsequent
17361  // expression to determine if it's a variable or an expression.
17362  switch (parser->current.type) {
17363  case PM_TOKEN_IDENTIFIER: {
17364  parser_lex(parser);
17365  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17366 
17367  if (variable == NULL) {
17368  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17369  variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17370  }
17371 
17372  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17373  }
17375  parser_lex(parser);
17376  pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17377 
17378  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17379  }
17380  case PM_TOKEN_CLASS_VARIABLE: {
17381  parser_lex(parser);
17382  pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17383 
17384  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17385  }
17386  case PM_TOKEN_GLOBAL_VARIABLE: {
17387  parser_lex(parser);
17388  pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17389 
17390  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17391  }
17393  parser_lex(parser);
17394  pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17395 
17396  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17397  }
17398  case PM_TOKEN_BACK_REFERENCE: {
17399  parser_lex(parser);
17400  pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17401 
17402  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17403  }
17405  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17406  parser->pattern_matching_newlines = false;
17407 
17408  pm_token_t lparen = parser->current;
17409  parser_lex(parser);
17410 
17411  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17412  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17413 
17414  accept1(parser, PM_TOKEN_NEWLINE);
17415  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17416  return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17417  }
17418  default: {
17419  // If we get here, then we have a pin operator followed by something
17420  // not understood. We'll create a missing node and return that.
17421  pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17422  pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17423  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17424  }
17425  }
17426  }
17427  case PM_TOKEN_UCOLON_COLON: {
17428  pm_token_t delimiter = parser->current;
17429  parser_lex(parser);
17430 
17431  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17432  pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17433 
17434  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17435  }
17436  case PM_TOKEN_CONSTANT: {
17437  pm_token_t constant = parser->current;
17438  parser_lex(parser);
17439 
17440  pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17441  return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17442  }
17443  default:
17444  pm_parser_err_current(parser, diag_id);
17445  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17446  }
17447 }
17448 
17453 static pm_node_t *
17454 parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17455  pm_node_t *node = first_node;
17456 
17457  while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17458  pm_token_t operator = parser->previous;
17459 
17460  switch (parser->current.type) {
17461  case PM_TOKEN_IDENTIFIER:
17463  case PM_TOKEN_BRACE_LEFT:
17464  case PM_TOKEN_CARET:
17465  case PM_TOKEN_CONSTANT:
17466  case PM_TOKEN_UCOLON_COLON:
17467  case PM_TOKEN_UDOT_DOT:
17468  case PM_TOKEN_UDOT_DOT_DOT:
17469  case PM_CASE_PRIMITIVE: {
17470  if (node == NULL) {
17471  node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17472  } else {
17473  pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17474  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17475  }
17476 
17477  break;
17478  }
17481  pm_token_t opening = parser->current;
17482  parser_lex(parser);
17483 
17484  pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17485  accept1(parser, PM_TOKEN_NEWLINE);
17486  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17487  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17488 
17489  if (node == NULL) {
17490  node = right;
17491  } else {
17492  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17493  }
17494 
17495  break;
17496  }
17497  default: {
17498  pm_parser_err_current(parser, diag_id);
17499  pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17500 
17501  if (node == NULL) {
17502  node = right;
17503  } else {
17504  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17505  }
17506 
17507  break;
17508  }
17509  }
17510  }
17511 
17512  // If we have an =>, then we are assigning this pattern to a variable.
17513  // In this case we should create an assignment node.
17514  while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17515  pm_token_t operator = parser->previous;
17516  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17517 
17518  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17519  int depth;
17520 
17521  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17522  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17523  }
17524 
17525  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17526  pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17527  parser,
17528  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17529  constant_id,
17530  (uint32_t) (depth == -1 ? 0 : depth)
17531  );
17532 
17533  node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17534  }
17535 
17536  return node;
17537 }
17538 
17542 static pm_node_t *
17543 parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17544  pm_node_t *node = NULL;
17545 
17546  bool leading_rest = false;
17547  bool trailing_rest = false;
17548 
17549  switch (parser->current.type) {
17550  case PM_TOKEN_LABEL: {
17551  parser_lex(parser);
17552  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17553  node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17554 
17555  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17556  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17557  }
17558 
17559  return node;
17560  }
17561  case PM_TOKEN_USTAR_STAR: {
17562  node = parse_pattern_keyword_rest(parser, captures);
17563  node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17564 
17565  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17566  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17567  }
17568 
17569  return node;
17570  }
17571  case PM_TOKEN_STRING_BEGIN: {
17572  // We need special handling for string beginnings because they could
17573  // be dynamic symbols leading to hash patterns.
17574  node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17575 
17576  if (pm_symbol_node_label_p(node)) {
17577  node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17578 
17579  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17580  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17581  }
17582 
17583  return node;
17584  }
17585 
17586  node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17587  break;
17588  }
17589  case PM_TOKEN_USTAR: {
17590  if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17591  parser_lex(parser);
17592  node = (pm_node_t *) parse_pattern_rest(parser, captures);
17593  leading_rest = true;
17594  break;
17595  }
17596  }
17597  /* fallthrough */
17598  default:
17599  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17600  break;
17601  }
17602 
17603  // If we got a dynamic label symbol, then we need to treat it like the
17604  // beginning of a hash pattern.
17605  if (pm_symbol_node_label_p(node)) {
17606  return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17607  }
17608 
17609  if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17610  // If we have a comma, then we are now parsing either an array pattern
17611  // or a find pattern. We need to parse all of the patterns, put them
17612  // into a big list, and then determine which type of node we have.
17613  pm_node_list_t nodes = { 0 };
17614  pm_node_list_append(&nodes, node);
17615 
17616  // Gather up all of the patterns into the list.
17617  while (accept1(parser, PM_TOKEN_COMMA)) {
17618  // Break early here in case we have a trailing comma.
17620  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17621  pm_node_list_append(&nodes, node);
17622  trailing_rest = true;
17623  break;
17624  }
17625 
17626  if (accept1(parser, PM_TOKEN_USTAR)) {
17627  node = (pm_node_t *) parse_pattern_rest(parser, captures);
17628 
17629  // If we have already parsed a splat pattern, then this is an
17630  // error. We will continue to parse the rest of the patterns,
17631  // but we will indicate it as an error.
17632  if (trailing_rest) {
17633  pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17634  }
17635 
17636  trailing_rest = true;
17637  } else {
17638  node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17639  }
17640 
17641  pm_node_list_append(&nodes, node);
17642  }
17643 
17644  // If the first pattern and the last pattern are rest patterns, then we
17645  // will call this a find pattern, regardless of how many rest patterns
17646  // are in between because we know we already added the appropriate
17647  // errors. Otherwise we will create an array pattern.
17648  if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17649  node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17650 
17651  if (nodes.size == 2) {
17652  pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17653  }
17654  } else {
17655  node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17656 
17657  if (leading_rest && trailing_rest) {
17658  pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17659  }
17660  }
17661 
17662  xfree(nodes.nodes);
17663  } else if (leading_rest) {
17664  // Otherwise, if we parsed a single splat pattern, then we know we have
17665  // an array pattern, so we can go ahead and create that node.
17666  node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17667  }
17668 
17669  return node;
17670 }
17671 
17677 static inline void
17678 parse_negative_numeric(pm_node_t *node) {
17679  switch (PM_NODE_TYPE(node)) {
17680  case PM_INTEGER_NODE: {
17681  pm_integer_node_t *cast = (pm_integer_node_t *) node;
17682  cast->base.location.start--;
17683  cast->value.negative = true;
17684  break;
17685  }
17686  case PM_FLOAT_NODE: {
17687  pm_float_node_t *cast = (pm_float_node_t *) node;
17688  cast->base.location.start--;
17689  cast->value = -cast->value;
17690  break;
17691  }
17692  case PM_RATIONAL_NODE: {
17693  pm_rational_node_t *cast = (pm_rational_node_t *) node;
17694  cast->base.location.start--;
17695  cast->numerator.negative = true;
17696  break;
17697  }
17698  case PM_IMAGINARY_NODE:
17699  node->location.start--;
17700  parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17701  break;
17702  default:
17703  assert(false && "unreachable");
17704  break;
17705  }
17706 }
17707 
17713 static void
17714 pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17715  switch (diag_id) {
17716  case PM_ERR_HASH_KEY: {
17717  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17718  break;
17719  }
17720  case PM_ERR_HASH_VALUE:
17721  case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17722  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17723  break;
17724  }
17725  case PM_ERR_UNARY_RECEIVER: {
17726  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17727  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17728  break;
17729  }
17730  case PM_ERR_UNARY_DISALLOWED:
17731  case PM_ERR_EXPECT_ARGUMENT: {
17732  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17733  break;
17734  }
17735  default:
17736  pm_parser_err_previous(parser, diag_id);
17737  break;
17738  }
17739 }
17740 
17744 static void
17745 parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17746 #define CONTEXT_NONE 0
17747 #define CONTEXT_THROUGH_ENSURE 1
17748 #define CONTEXT_THROUGH_ELSE 2
17749 
17750  pm_context_node_t *context_node = parser->current_context;
17751  int context = CONTEXT_NONE;
17752 
17753  while (context_node != NULL) {
17754  switch (context_node->context) {
17758  case PM_CONTEXT_DEF_RESCUE:
17762  case PM_CONTEXT_DEFINED:
17764  // These are the good cases. We're allowed to have a retry here.
17765  return;
17766  case PM_CONTEXT_CLASS:
17767  case PM_CONTEXT_DEF:
17768  case PM_CONTEXT_DEF_PARAMS:
17769  case PM_CONTEXT_MAIN:
17770  case PM_CONTEXT_MODULE:
17771  case PM_CONTEXT_PREEXE:
17772  case PM_CONTEXT_SCLASS:
17773  // These are the bad cases. We're not allowed to have a retry in
17774  // these contexts.
17775  if (context == CONTEXT_NONE) {
17776  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17777  } else if (context == CONTEXT_THROUGH_ENSURE) {
17778  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17779  } else if (context == CONTEXT_THROUGH_ELSE) {
17780  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17781  }
17782  return;
17783  case PM_CONTEXT_BEGIN_ELSE:
17784  case PM_CONTEXT_BLOCK_ELSE:
17785  case PM_CONTEXT_CLASS_ELSE:
17786  case PM_CONTEXT_DEF_ELSE:
17790  // These are also bad cases, but with a more specific error
17791  // message indicating the else.
17792  context = CONTEXT_THROUGH_ELSE;
17793  break;
17797  case PM_CONTEXT_DEF_ENSURE:
17801  // These are also bad cases, but with a more specific error
17802  // message indicating the ensure.
17803  context = CONTEXT_THROUGH_ENSURE;
17804  break;
17805  case PM_CONTEXT_NONE:
17806  // This case should never happen.
17807  assert(false && "unreachable");
17808  break;
17809  case PM_CONTEXT_BEGIN:
17812  case PM_CONTEXT_CASE_IN:
17813  case PM_CONTEXT_CASE_WHEN:
17815  case PM_CONTEXT_ELSE:
17816  case PM_CONTEXT_ELSIF:
17817  case PM_CONTEXT_EMBEXPR:
17818  case PM_CONTEXT_FOR_INDEX:
17819  case PM_CONTEXT_FOR:
17820  case PM_CONTEXT_IF:
17825  case PM_CONTEXT_PARENS:
17826  case PM_CONTEXT_POSTEXE:
17827  case PM_CONTEXT_PREDICATE:
17828  case PM_CONTEXT_TERNARY:
17829  case PM_CONTEXT_UNLESS:
17830  case PM_CONTEXT_UNTIL:
17831  case PM_CONTEXT_WHILE:
17832  // In these contexts we should continue walking up the list of
17833  // contexts.
17834  break;
17835  }
17836 
17837  context_node = context_node->prev;
17838  }
17839 
17840 #undef CONTEXT_NONE
17841 #undef CONTEXT_ENSURE
17842 #undef CONTEXT_ELSE
17843 }
17844 
17848 static void
17849 parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17850  pm_context_node_t *context_node = parser->current_context;
17851 
17852  while (context_node != NULL) {
17853  switch (context_node->context) {
17854  case PM_CONTEXT_DEF:
17855  case PM_CONTEXT_DEF_PARAMS:
17856  case PM_CONTEXT_DEFINED:
17857  case PM_CONTEXT_DEF_ENSURE:
17858  case PM_CONTEXT_DEF_RESCUE:
17859  case PM_CONTEXT_DEF_ELSE:
17860  // These are the good cases. We're allowed to have a block exit
17861  // in these contexts.
17862  return;
17863  case PM_CONTEXT_CLASS:
17866  case PM_CONTEXT_CLASS_ELSE:
17867  case PM_CONTEXT_MAIN:
17868  case PM_CONTEXT_MODULE:
17872  case PM_CONTEXT_SCLASS:
17876  // These are the bad cases. We're not allowed to have a retry in
17877  // these contexts.
17878  pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17879  return;
17880  case PM_CONTEXT_NONE:
17881  // This case should never happen.
17882  assert(false && "unreachable");
17883  break;
17884  case PM_CONTEXT_BEGIN:
17885  case PM_CONTEXT_BEGIN_ELSE:
17890  case PM_CONTEXT_BLOCK_ELSE:
17893  case PM_CONTEXT_CASE_IN:
17894  case PM_CONTEXT_CASE_WHEN:
17896  case PM_CONTEXT_ELSE:
17897  case PM_CONTEXT_ELSIF:
17898  case PM_CONTEXT_EMBEXPR:
17899  case PM_CONTEXT_FOR_INDEX:
17900  case PM_CONTEXT_FOR:
17901  case PM_CONTEXT_IF:
17909  case PM_CONTEXT_PARENS:
17910  case PM_CONTEXT_POSTEXE:
17911  case PM_CONTEXT_PREDICATE:
17912  case PM_CONTEXT_PREEXE:
17914  case PM_CONTEXT_TERNARY:
17915  case PM_CONTEXT_UNLESS:
17916  case PM_CONTEXT_UNTIL:
17917  case PM_CONTEXT_WHILE:
17918  // In these contexts we should continue walking up the list of
17919  // contexts.
17920  break;
17921  }
17922 
17923  context_node = context_node->prev;
17924  }
17925 }
17926 
17931 typedef struct {
17934 
17936  const uint8_t *start;
17937 
17939  const uint8_t *end;
17940 
17947  bool shared;
17949 
17954 static void
17955 parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17957  pm_location_t location;
17958 
17959  if (callback_data->shared) {
17960  location = (pm_location_t) { .start = start, .end = end };
17961  } else {
17962  location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17963  }
17964 
17965  PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17966 }
17967 
17971 static void
17972 parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17973  const pm_string_t *unescaped = &node->unescaped;
17975  .parser = parser,
17976  .start = node->base.location.start,
17977  .end = node->base.location.end,
17978  .shared = unescaped->type == PM_STRING_SHARED
17979  };
17980 
17981  pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17982 }
17983 
17987 static inline pm_node_t *
17988 parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17989  switch (parser->current.type) {
17991  parser_lex(parser);
17992 
17993  pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17994  pm_accepts_block_stack_push(parser, true);
17995  bool parsed_bare_hash = false;
17996 
17997  while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17998  bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17999 
18000  // Handle the case where we don't have a comma and we have a
18001  // newline followed by a right bracket.
18002  if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18003  break;
18004  }
18005 
18006  // Ensure that we have a comma between elements in the array.
18007  if (array->elements.size > 0) {
18008  if (accept1(parser, PM_TOKEN_COMMA)) {
18009  // If there was a comma but we also accepts a newline,
18010  // then this is a syntax error.
18011  if (accepted_newline) {
18012  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18013  }
18014  } else {
18015  // If there was no comma, then we need to add a syntax
18016  // error.
18017  const uint8_t *location = parser->previous.end;
18018  PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18019 
18020  parser->previous.start = location;
18021  parser->previous.type = PM_TOKEN_MISSING;
18022  }
18023  }
18024 
18025  // If we have a right bracket immediately following a comma,
18026  // this is allowed since it's a trailing comma. In this case we
18027  // can break out of the loop.
18028  if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18029 
18030  pm_node_t *element;
18031 
18032  if (accept1(parser, PM_TOKEN_USTAR)) {
18033  pm_token_t operator = parser->previous;
18034  pm_node_t *expression = NULL;
18035 
18036  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18037  pm_parser_scope_forwarding_positionals_check(parser, &operator);
18038  } else {
18039  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18040  }
18041 
18042  element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18043  } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18044  if (parsed_bare_hash) {
18045  pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18046  }
18047 
18048  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18049  pm_static_literals_t hash_keys = { 0 };
18050 
18052  parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18053  }
18054 
18055  pm_static_literals_free(&hash_keys);
18056  parsed_bare_hash = true;
18057  } else {
18058  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18059 
18060  if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18061  if (parsed_bare_hash) {
18062  pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18063  }
18064 
18065  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18066  pm_static_literals_t hash_keys = { 0 };
18067  pm_hash_key_static_literals_add(parser, &hash_keys, element);
18068 
18069  pm_token_t operator;
18070  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18071  operator = parser->previous;
18072  } else {
18073  operator = not_provided(parser);
18074  }
18075 
18076  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18077  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18078  pm_keyword_hash_node_elements_append(hash, assoc);
18079 
18080  element = (pm_node_t *) hash;
18081  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18082  parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18083  }
18084 
18085  pm_static_literals_free(&hash_keys);
18086  parsed_bare_hash = true;
18087  }
18088  }
18089 
18090  pm_array_node_elements_append(array, element);
18091  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18092  }
18093 
18094  accept1(parser, PM_TOKEN_NEWLINE);
18095 
18096  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18097  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18098  parser->previous.start = parser->previous.end;
18099  parser->previous.type = PM_TOKEN_MISSING;
18100  }
18101 
18102  pm_array_node_close_set(array, &parser->previous);
18103  pm_accepts_block_stack_pop(parser);
18104 
18105  return (pm_node_t *) array;
18106  }
18109  pm_token_t opening = parser->current;
18110 
18111  pm_node_list_t current_block_exits = { 0 };
18112  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18113 
18114  parser_lex(parser);
18115  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18116 
18117  // If this is the end of the file or we match a right parenthesis, then
18118  // we have an empty parentheses node, and we can immediately return.
18119  if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18120  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18121 
18122  pop_block_exits(parser, previous_block_exits);
18123  pm_node_list_free(&current_block_exits);
18124 
18125  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18126  }
18127 
18128  // Otherwise, we're going to parse the first statement in the list
18129  // of statements within the parentheses.
18130  pm_accepts_block_stack_push(parser, true);
18131  context_push(parser, PM_CONTEXT_PARENS);
18132  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18133  context_pop(parser);
18134 
18135  // Determine if this statement is followed by a terminator. In the
18136  // case of a single statement, this is fine. But in the case of
18137  // multiple statements it's required.
18138  bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18139  if (terminator_found) {
18140  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18141  }
18142 
18143  // If we hit a right parenthesis, then we're done parsing the
18144  // parentheses node, and we can check which kind of node we should
18145  // return.
18146  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18148  lex_state_set(parser, PM_LEX_STATE_ENDARG);
18149  }
18150 
18151  parser_lex(parser);
18152  pm_accepts_block_stack_pop(parser);
18153 
18154  pop_block_exits(parser, previous_block_exits);
18155  pm_node_list_free(&current_block_exits);
18156 
18157  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18158  // If we have a single statement and are ending on a right
18159  // parenthesis, then we need to check if this is possibly a
18160  // multiple target node.
18161  pm_multi_target_node_t *multi_target;
18162 
18163  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18164  multi_target = (pm_multi_target_node_t *) statement;
18165  } else {
18166  multi_target = pm_multi_target_node_create(parser);
18167  pm_multi_target_node_targets_append(parser, multi_target, statement);
18168  }
18169 
18170  pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18171  pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18172 
18173  multi_target->lparen_loc = lparen_loc;
18174  multi_target->rparen_loc = rparen_loc;
18175  multi_target->base.location.start = lparen_loc.start;
18176  multi_target->base.location.end = rparen_loc.end;
18177 
18178  pm_node_t *result;
18179  if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18180  result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18181  accept1(parser, PM_TOKEN_NEWLINE);
18182  } else {
18183  result = (pm_node_t *) multi_target;
18184  }
18185 
18186  if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18187  // All set, this is explicitly allowed by the parent
18188  // context.
18189  } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18190  // All set, we're inside a for loop and we're parsing
18191  // multiple targets.
18192  } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18193  // Multi targets are not allowed when it's not a
18194  // statement level.
18195  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18196  } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18197  // Multi targets must be followed by an equal sign in
18198  // order to be valid (or a right parenthesis if they are
18199  // nested).
18200  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18201  }
18202 
18203  return result;
18204  }
18205 
18206  // If we have a single statement and are ending on a right parenthesis
18207  // and we didn't return a multiple assignment node, then we can return a
18208  // regular parentheses node now.
18209  pm_statements_node_t *statements = pm_statements_node_create(parser);
18210  pm_statements_node_body_append(parser, statements, statement, true);
18211 
18212  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18213  }
18214 
18215  // If we have more than one statement in the set of parentheses,
18216  // then we are going to parse all of them as a list of statements.
18217  // We'll do that here.
18218  context_push(parser, PM_CONTEXT_PARENS);
18219  pm_statements_node_t *statements = pm_statements_node_create(parser);
18220  pm_statements_node_body_append(parser, statements, statement, true);
18221 
18222  // If we didn't find a terminator and we didn't find a right
18223  // parenthesis, then this is a syntax error.
18224  if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18225  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18226  }
18227 
18228  // Parse each statement within the parentheses.
18229  while (true) {
18230  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18231  pm_statements_node_body_append(parser, statements, node, true);
18232 
18233  // If we're recovering from a syntax error, then we need to stop
18234  // parsing the statements now.
18235  if (parser->recovering) {
18236  // If this is the level of context where the recovery has
18237  // happened, then we can mark the parser as done recovering.
18238  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18239  break;
18240  }
18241 
18242  // If we couldn't parse an expression at all, then we need to
18243  // bail out of the loop.
18244  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18245 
18246  // If we successfully parsed a statement, then we are going to
18247  // need terminator to delimit them.
18248  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18249  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18250  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18251  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18252  break;
18253  } else if (!match1(parser, PM_TOKEN_EOF)) {
18254  // If we're at the end of the file, then we're going to add
18255  // an error after this for the ) anyway.
18256  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18257  }
18258  }
18259 
18260  context_pop(parser);
18261  pm_accepts_block_stack_pop(parser);
18262  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18263 
18264  // When we're parsing multi targets, we allow them to be followed by
18265  // a right parenthesis if they are at the statement level. This is
18266  // only possible if they are the final statement in a parentheses.
18267  // We need to explicitly reject that here.
18268  {
18269  pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18270 
18271  if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18272  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18273  pm_multi_target_node_targets_append(parser, multi_target, statement);
18274 
18275  statement = (pm_node_t *) multi_target;
18276  statements->body.nodes[statements->body.size - 1] = statement;
18277  }
18278 
18279  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18280  const uint8_t *offset = statement->location.end;
18281  pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18282  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18283 
18284  statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18285  statements->body.nodes[statements->body.size - 1] = statement;
18286 
18287  pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18288  }
18289  }
18290 
18291  pop_block_exits(parser, previous_block_exits);
18292  pm_node_list_free(&current_block_exits);
18293 
18294  pm_void_statements_check(parser, statements, true);
18295  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18296  }
18297  case PM_TOKEN_BRACE_LEFT: {
18298  // If we were passed a current_hash_keys via the parser, then that
18299  // means we're already parsing a hash and we want to share the set
18300  // of hash keys with this inner hash we're about to parse for the
18301  // sake of warnings. We'll set it to NULL after we grab it to make
18302  // sure subsequent expressions don't use it. Effectively this is a
18303  // way of getting around passing it to every call to
18304  // parse_expression.
18305  pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18306  parser->current_hash_keys = NULL;
18307 
18308  pm_accepts_block_stack_push(parser, true);
18309  parser_lex(parser);
18310 
18311  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18312 
18313  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18314  if (current_hash_keys != NULL) {
18315  parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18316  } else {
18317  pm_static_literals_t hash_keys = { 0 };
18318  parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18319  pm_static_literals_free(&hash_keys);
18320  }
18321 
18322  accept1(parser, PM_TOKEN_NEWLINE);
18323  }
18324 
18325  pm_accepts_block_stack_pop(parser);
18326  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18327  pm_hash_node_closing_loc_set(node, &parser->previous);
18328 
18329  return (pm_node_t *) node;
18330  }
18332  parser_lex(parser);
18333 
18334  pm_token_t opening = parser->previous;
18335  opening.type = PM_TOKEN_STRING_BEGIN;
18336  opening.end = opening.start + 1;
18337 
18338  pm_token_t content = parser->previous;
18339  content.type = PM_TOKEN_STRING_CONTENT;
18340  content.start = content.start + 1;
18341 
18342  pm_token_t closing = not_provided(parser);
18343  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18344  pm_node_flag_set(node, parse_unescaped_encoding(parser));
18345 
18346  // Characters can be followed by strings in which case they are
18347  // automatically concatenated.
18348  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18349  return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18350  }
18351 
18352  return node;
18353  }
18354  case PM_TOKEN_CLASS_VARIABLE: {
18355  parser_lex(parser);
18356  pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18357 
18358  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18359  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18360  }
18361 
18362  return node;
18363  }
18364  case PM_TOKEN_CONSTANT: {
18365  parser_lex(parser);
18366  pm_token_t constant = parser->previous;
18367 
18368  // If a constant is immediately followed by parentheses, then this is in
18369  // fact a method call, not a constant read.
18370  if (
18371  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18372  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18373  (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18374  match1(parser, PM_TOKEN_BRACE_LEFT)
18375  ) {
18376  pm_arguments_t arguments = { 0 };
18377  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18378  return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18379  }
18380 
18381  pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18382 
18383  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18384  // If we get here, then we have a comma immediately following a
18385  // constant, so we're going to parse this as a multiple assignment.
18386  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18387  }
18388 
18389  return node;
18390  }
18391  case PM_TOKEN_UCOLON_COLON: {
18392  parser_lex(parser);
18393  pm_token_t delimiter = parser->previous;
18394 
18395  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18396  pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18397 
18398  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18399  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18400  }
18401 
18402  return node;
18403  }
18404  case PM_TOKEN_UDOT_DOT:
18405  case PM_TOKEN_UDOT_DOT_DOT: {
18406  pm_token_t operator = parser->current;
18407  parser_lex(parser);
18408 
18409  pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18410 
18411  // Unary .. and ... are special because these are non-associative
18412  // operators that can also be unary operators. In this case we need
18413  // to explicitly reject code that has a .. or ... that follows this
18414  // expression.
18415  if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18416  pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18417  }
18418 
18419  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18420  }
18421  case PM_TOKEN_FLOAT:
18422  parser_lex(parser);
18423  return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18425  parser_lex(parser);
18426  return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18428  parser_lex(parser);
18429  return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18431  parser_lex(parser);
18432  return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18434  parser_lex(parser);
18435  pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18436 
18437  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18438  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18439  }
18440 
18441  return node;
18442  }
18443  case PM_TOKEN_GLOBAL_VARIABLE: {
18444  parser_lex(parser);
18445  pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18446 
18447  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18448  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18449  }
18450 
18451  return node;
18452  }
18453  case PM_TOKEN_BACK_REFERENCE: {
18454  parser_lex(parser);
18455  pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18456 
18457  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18458  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18459  }
18460 
18461  return node;
18462  }
18463  case PM_TOKEN_IDENTIFIER:
18464  case PM_TOKEN_METHOD_NAME: {
18465  parser_lex(parser);
18466  pm_token_t identifier = parser->previous;
18467  pm_node_t *node = parse_variable_call(parser);
18468 
18469  if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18470  // If parse_variable_call returned with a call node, then we
18471  // know the identifier is not in the local table. In that case
18472  // we need to check if there are arguments following the
18473  // identifier.
18474  pm_call_node_t *call = (pm_call_node_t *) node;
18475  pm_arguments_t arguments = { 0 };
18476 
18477  if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18478  // Since we found arguments, we need to turn off the
18479  // variable call bit in the flags.
18480  pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18481 
18482  call->opening_loc = arguments.opening_loc;
18483  call->arguments = arguments.arguments;
18484  call->closing_loc = arguments.closing_loc;
18485  call->block = arguments.block;
18486 
18487  if (arguments.block != NULL) {
18488  call->base.location.end = arguments.block->location.end;
18489  } else if (arguments.closing_loc.start == NULL) {
18490  if (arguments.arguments != NULL) {
18491  call->base.location.end = arguments.arguments->base.location.end;
18492  } else {
18493  call->base.location.end = call->message_loc.end;
18494  }
18495  } else {
18496  call->base.location.end = arguments.closing_loc.end;
18497  }
18498  }
18499  } else {
18500  // Otherwise, we know the identifier is in the local table. This
18501  // can still be a method call if it is followed by arguments or
18502  // a block, so we need to check for that here.
18503  if (
18504  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18505  (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18506  match1(parser, PM_TOKEN_BRACE_LEFT)
18507  ) {
18508  pm_arguments_t arguments = { 0 };
18509  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18510  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18511 
18513  // If we're about to convert an 'it' implicit local
18514  // variable read into a method call, we need to remove
18515  // it from the list of implicit local variables.
18516  parse_target_implicit_parameter(parser, node);
18517  } else {
18518  // Otherwise, we're about to convert a regular local
18519  // variable read into a method call, in which case we
18520  // need to indicate that this was not a read for the
18521  // purposes of warnings.
18523 
18524  if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18525  parse_target_implicit_parameter(parser, node);
18526  } else {
18528  pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18529  }
18530  }
18531 
18532  pm_node_destroy(parser, node);
18533  return (pm_node_t *) fcall;
18534  }
18535  }
18536 
18537  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18538  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18539  }
18540 
18541  return node;
18542  }
18543  case PM_TOKEN_HEREDOC_START: {
18544  // Here we have found a heredoc. We'll parse it and add it to the
18545  // list of strings.
18546  assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18547  pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18548 
18549  size_t common_whitespace = (size_t) -1;
18550  parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18551 
18552  parser_lex(parser);
18553  pm_token_t opening = parser->previous;
18554 
18555  pm_node_t *node;
18556  pm_node_t *part;
18557 
18558  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18559  // If we get here, then we have an empty heredoc. We'll create
18560  // an empty content token and return an empty string node.
18561  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18562  pm_token_t content = parse_strings_empty_content(parser->previous.start);
18563 
18564  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18565  node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18566  } else {
18567  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18568  }
18569 
18570  node->location.end = opening.end;
18571  } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18572  // If we get here, then we tried to find something in the
18573  // heredoc but couldn't actually parse anything, so we'll just
18574  // return a missing node.
18575  //
18576  // parse_string_part handles its own errors, so there is no need
18577  // for us to add one here.
18578  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18579  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18580  // If we get here, then the part that we parsed was plain string
18581  // content and we're at the end of the heredoc, so we can return
18582  // just a string node with the heredoc opening and closing as
18583  // its opening and closing.
18584  pm_node_flag_set(part, parse_unescaped_encoding(parser));
18585  pm_string_node_t *cast = (pm_string_node_t *) part;
18586 
18587  cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18588  cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18589  cast->base.location = cast->opening_loc;
18590 
18591  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18592  assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18593  cast->base.type = PM_X_STRING_NODE;
18594  }
18595 
18596  if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18597  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18598  }
18599 
18600  node = (pm_node_t *) cast;
18601  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18602  } else {
18603  // If we get here, then we have multiple parts in the heredoc,
18604  // so we'll need to create an interpolated string node to hold
18605  // them all.
18606  pm_node_list_t parts = { 0 };
18607  pm_node_list_append(&parts, part);
18608 
18609  while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18610  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18611  pm_node_list_append(&parts, part);
18612  }
18613  }
18614 
18615  // Now that we have all of the parts, create the correct type of
18616  // interpolated node.
18617  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18618  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18619  cast->parts = parts;
18620 
18621  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18622  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18623 
18624  cast->base.location = cast->opening_loc;
18625  node = (pm_node_t *) cast;
18626  } else {
18627  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18628  pm_node_list_free(&parts);
18629 
18630  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18631  pm_interpolated_string_node_closing_set(cast, &parser->previous);
18632 
18633  cast->base.location = cast->opening_loc;
18634  node = (pm_node_t *) cast;
18635  }
18636 
18637  // If this is a heredoc that is indented with a ~, then we need
18638  // to dedent each line by the common leading whitespace.
18639  if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18640  pm_node_list_t *nodes;
18641  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18642  nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18643  } else {
18644  nodes = &((pm_interpolated_string_node_t *) node)->parts;
18645  }
18646 
18647  parse_heredoc_dedent(parser, nodes, common_whitespace);
18648  }
18649  }
18650 
18651  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18652  return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18653  }
18654 
18655  return node;
18656  }
18658  parser_lex(parser);
18659  pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18660 
18661  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18662  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18663  }
18664 
18665  return node;
18666  }
18667  case PM_TOKEN_INTEGER: {
18668  pm_node_flags_t base = parser->integer_base;
18669  parser_lex(parser);
18670  return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18671  }
18673  pm_node_flags_t base = parser->integer_base;
18674  parser_lex(parser);
18675  return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18676  }
18678  pm_node_flags_t base = parser->integer_base;
18679  parser_lex(parser);
18680  return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18681  }
18683  pm_node_flags_t base = parser->integer_base;
18684  parser_lex(parser);
18685  return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18686  }
18688  parser_lex(parser);
18689  return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18691  parser_lex(parser);
18692  return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18694  parser_lex(parser);
18695  return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18696  case PM_TOKEN_KEYWORD_ALIAS: {
18697  if (binding_power != PM_BINDING_POWER_STATEMENT) {
18698  pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18699  }
18700 
18701  parser_lex(parser);
18702  pm_token_t keyword = parser->previous;
18703 
18704  pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18705  pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18706 
18707  switch (PM_NODE_TYPE(new_name)) {
18713  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18714  }
18715  } else {
18716  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18717  }
18718 
18719  return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18720  }
18721  case PM_SYMBOL_NODE:
18724  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18725  }
18726  }
18727  /* fallthrough */
18728  default:
18729  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18730  }
18731  }
18732  case PM_TOKEN_KEYWORD_CASE: {
18733  size_t opening_newline_index = token_newline_index(parser);
18734  parser_lex(parser);
18735 
18736  pm_token_t case_keyword = parser->previous;
18737  pm_node_t *predicate = NULL;
18738 
18739  pm_node_list_t current_block_exits = { 0 };
18740  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18741 
18742  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18743  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18744  predicate = NULL;
18745  } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18746  predicate = NULL;
18747  } else if (!token_begins_expression_p(parser->current.type)) {
18748  predicate = NULL;
18749  } else {
18750  predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18751  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18752  }
18753 
18754  if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18755  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18756  parser_lex(parser);
18757 
18758  pop_block_exits(parser, previous_block_exits);
18759  pm_node_list_free(&current_block_exits);
18760 
18761  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18762  return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18763  }
18764 
18765  // At this point we can create a case node, though we don't yet know
18766  // if it is a case-in or case-when node.
18767  pm_token_t end_keyword = not_provided(parser);
18768  pm_node_t *node;
18769 
18770  if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18771  pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18772  pm_static_literals_t literals = { 0 };
18773 
18774  // At this point we've seen a when keyword, so we know this is a
18775  // case-when node. We will continue to parse the when nodes
18776  // until we hit the end of the list.
18777  while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18778  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18779  parser_lex(parser);
18780 
18781  pm_token_t when_keyword = parser->previous;
18782  pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18783 
18784  do {
18785  if (accept1(parser, PM_TOKEN_USTAR)) {
18786  pm_token_t operator = parser->previous;
18787  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18788 
18789  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18790  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18791 
18792  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18793  } else {
18794  pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18795  pm_when_node_conditions_append(when_node, condition);
18796 
18797  // If we found a missing node, then this is a syntax
18798  // error and we should stop looping.
18799  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18800 
18801  // If this is a string node, then we need to mark it
18802  // as frozen because when clause strings are frozen.
18803  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18804  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18805  } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18806  pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18807  }
18808 
18809  pm_when_clause_static_literals_add(parser, &literals, condition);
18810  }
18811  } while (accept1(parser, PM_TOKEN_COMMA));
18812 
18813  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18814  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18815  pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18816  }
18817  } else {
18818  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18819  pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18820  }
18821 
18823  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18824  if (statements != NULL) {
18825  pm_when_node_statements_set(when_node, statements);
18826  }
18827  }
18828 
18829  pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18830  }
18831 
18832  // If we didn't parse any conditions (in or when) then we need
18833  // to indicate that we have an error.
18834  if (case_node->conditions.size == 0) {
18835  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18836  }
18837 
18838  pm_static_literals_free(&literals);
18839  node = (pm_node_t *) case_node;
18840  } else {
18841  pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18842 
18843  // If this is a case-match node (i.e., it is a pattern matching
18844  // case statement) then we must have a predicate.
18845  if (predicate == NULL) {
18846  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18847  }
18848 
18849  // At this point we expect that we're parsing a case-in node. We
18850  // will continue to parse the in nodes until we hit the end of
18851  // the list.
18852  while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18853  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18854 
18855  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18856  parser->pattern_matching_newlines = true;
18857 
18858  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18859  parser->command_start = false;
18860  parser_lex(parser);
18861 
18862  pm_token_t in_keyword = parser->previous;
18863 
18864  pm_constant_id_list_t captures = { 0 };
18865  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18866 
18867  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18868  pm_constant_id_list_free(&captures);
18869 
18870  // Since we're in the top-level of the case-in node we need
18871  // to check for guard clauses in the form of `if` or
18872  // `unless` statements.
18873  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18874  pm_token_t keyword = parser->previous;
18875  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18876  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18877  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18878  pm_token_t keyword = parser->previous;
18879  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18880  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18881  }
18882 
18883  // Now we need to check for the terminator of the in node's
18884  // pattern. It can be a newline or semicolon optionally
18885  // followed by a `then` keyword.
18886  pm_token_t then_keyword;
18887  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18888  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18889  then_keyword = parser->previous;
18890  } else {
18891  then_keyword = not_provided(parser);
18892  }
18893  } else {
18894  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18895  then_keyword = parser->previous;
18896  }
18897 
18898  // Now we can actually parse the statements associated with
18899  // the in node.
18900  pm_statements_node_t *statements;
18902  statements = NULL;
18903  } else {
18904  statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18905  }
18906 
18907  // Now that we have the full pattern and statements, we can
18908  // create the node and attach it to the case node.
18909  pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18910  pm_case_match_node_condition_append(case_node, condition);
18911  }
18912 
18913  // If we didn't parse any conditions (in or when) then we need
18914  // to indicate that we have an error.
18915  if (case_node->conditions.size == 0) {
18916  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18917  }
18918 
18919  node = (pm_node_t *) case_node;
18920  }
18921 
18922  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18923  if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18924  pm_token_t else_keyword = parser->previous;
18925  pm_else_node_t *else_node;
18926 
18927  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18928  else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18929  } else {
18930  else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18931  }
18932 
18933  if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18934  pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18935  } else {
18936  pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18937  }
18938  }
18939 
18940  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18941  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18942 
18943  if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18944  pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18945  } else {
18946  pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18947  }
18948 
18949  pop_block_exits(parser, previous_block_exits);
18950  pm_node_list_free(&current_block_exits);
18951 
18952  return node;
18953  }
18954  case PM_TOKEN_KEYWORD_BEGIN: {
18955  size_t opening_newline_index = token_newline_index(parser);
18956  parser_lex(parser);
18957 
18958  pm_token_t begin_keyword = parser->previous;
18959  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18960 
18961  pm_node_list_t current_block_exits = { 0 };
18962  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18963  pm_statements_node_t *begin_statements = NULL;
18964 
18966  pm_accepts_block_stack_push(parser, true);
18967  begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18968  pm_accepts_block_stack_pop(parser);
18969  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18970  }
18971 
18972  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18973  parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18974  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
18975 
18976  begin_node->base.location.end = parser->previous.end;
18977  pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18978 
18979  pop_block_exits(parser, previous_block_exits);
18980  pm_node_list_free(&current_block_exits);
18981 
18982  return (pm_node_t *) begin_node;
18983  }
18985  pm_node_list_t current_block_exits = { 0 };
18986  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18987 
18988  if (binding_power != PM_BINDING_POWER_STATEMENT) {
18989  pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18990  }
18991 
18992  parser_lex(parser);
18993  pm_token_t keyword = parser->previous;
18994 
18995  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18996  pm_token_t opening = parser->previous;
18997  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18998 
18999  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
19000  pm_context_t context = parser->current_context->context;
19001  if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
19002  pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19003  }
19004 
19005  flush_block_exits(parser, previous_block_exits);
19006  pm_node_list_free(&current_block_exits);
19007 
19008  return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19009  }
19011  case PM_TOKEN_KEYWORD_NEXT:
19012  case PM_TOKEN_KEYWORD_RETURN: {
19013  parser_lex(parser);
19014 
19015  pm_token_t keyword = parser->previous;
19016  pm_arguments_t arguments = { 0 };
19017 
19018  if (
19019  token_begins_expression_p(parser->current.type) ||
19020  match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19021  ) {
19022  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19023 
19024  if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19025  parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19026  }
19027  }
19028 
19029  switch (keyword.type) {
19030  case PM_TOKEN_KEYWORD_BREAK: {
19031  pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19032  if (!parser->partial_script) parse_block_exit(parser, node);
19033  return node;
19034  }
19035  case PM_TOKEN_KEYWORD_NEXT: {
19036  pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19037  if (!parser->partial_script) parse_block_exit(parser, node);
19038  return node;
19039  }
19040  case PM_TOKEN_KEYWORD_RETURN: {
19041  pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19042  parse_return(parser, node);
19043  return node;
19044  }
19045  default:
19046  assert(false && "unreachable");
19047  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19048  }
19049  }
19050  case PM_TOKEN_KEYWORD_SUPER: {
19051  parser_lex(parser);
19052 
19053  pm_token_t keyword = parser->previous;
19054  pm_arguments_t arguments = { 0 };
19055  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19056 
19057  if (
19058  arguments.opening_loc.start == NULL &&
19059  arguments.arguments == NULL &&
19060  ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19061  ) {
19062  return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19063  }
19064 
19065  return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19066  }
19067  case PM_TOKEN_KEYWORD_YIELD: {
19068  parser_lex(parser);
19069 
19070  pm_token_t keyword = parser->previous;
19071  pm_arguments_t arguments = { 0 };
19072  parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19073 
19074  // It's possible that we've parsed a block argument through our
19075  // call to parse_arguments_list. If we found one, we should mark it
19076  // as invalid and destroy it, as we don't have a place for it on the
19077  // yield node.
19078  if (arguments.block != NULL) {
19079  pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19080  pm_node_destroy(parser, arguments.block);
19081  arguments.block = NULL;
19082  }
19083 
19084  pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19085  if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19086 
19087  return node;
19088  }
19089  case PM_TOKEN_KEYWORD_CLASS: {
19090  size_t opening_newline_index = token_newline_index(parser);
19091  parser_lex(parser);
19092 
19093  pm_token_t class_keyword = parser->previous;
19094  pm_do_loop_stack_push(parser, false);
19095 
19096  pm_node_list_t current_block_exits = { 0 };
19097  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19098 
19099  if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19100  pm_token_t operator = parser->previous;
19101  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19102 
19103  pm_parser_scope_push(parser, true);
19104  if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19105  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19106  }
19107 
19108  pm_node_t *statements = NULL;
19110  pm_accepts_block_stack_push(parser, true);
19111  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19112  pm_accepts_block_stack_pop(parser);
19113  }
19114 
19115  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19116  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19117  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19118  } else {
19119  parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19120  }
19121 
19122  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19123 
19124  pm_constant_id_list_t locals;
19125  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19126 
19127  pm_parser_scope_pop(parser);
19128  pm_do_loop_stack_pop(parser);
19129 
19130  flush_block_exits(parser, previous_block_exits);
19131  pm_node_list_free(&current_block_exits);
19132 
19133  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19134  }
19135 
19136  pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19137  pm_token_t name = parser->previous;
19138  if (name.type != PM_TOKEN_CONSTANT) {
19139  pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19140  }
19141 
19142  pm_token_t inheritance_operator;
19143  pm_node_t *superclass;
19144 
19145  if (match1(parser, PM_TOKEN_LESS)) {
19146  inheritance_operator = parser->current;
19147  lex_state_set(parser, PM_LEX_STATE_BEG);
19148 
19149  parser->command_start = true;
19150  parser_lex(parser);
19151 
19152  superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19153  } else {
19154  inheritance_operator = not_provided(parser);
19155  superclass = NULL;
19156  }
19157 
19158  pm_parser_scope_push(parser, true);
19159 
19160  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19161  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19162  } else {
19163  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19164  }
19165  pm_node_t *statements = NULL;
19166 
19168  pm_accepts_block_stack_push(parser, true);
19169  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19170  pm_accepts_block_stack_pop(parser);
19171  }
19172 
19173  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19174  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19175  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19176  } else {
19177  parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19178  }
19179 
19180  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19181 
19182  if (context_def_p(parser)) {
19183  pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19184  }
19185 
19186  pm_constant_id_list_t locals;
19187  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19188 
19189  pm_parser_scope_pop(parser);
19190  pm_do_loop_stack_pop(parser);
19191 
19192  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19193  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19194  }
19195 
19196  pop_block_exits(parser, previous_block_exits);
19197  pm_node_list_free(&current_block_exits);
19198 
19199  return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19200  }
19201  case PM_TOKEN_KEYWORD_DEF: {
19202  pm_node_list_t current_block_exits = { 0 };
19203  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19204 
19205  pm_token_t def_keyword = parser->current;
19206  size_t opening_newline_index = token_newline_index(parser);
19207 
19208  pm_node_t *receiver = NULL;
19209  pm_token_t operator = not_provided(parser);
19210  pm_token_t name;
19211 
19212  // This context is necessary for lexing `...` in a bare params
19213  // correctly. It must be pushed before lexing the first param, so it
19214  // is here.
19215  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19216  parser_lex(parser);
19217 
19218  // This will be false if the method name is not a valid identifier
19219  // but could be followed by an operator.
19220  bool valid_name = true;
19221 
19222  switch (parser->current.type) {
19223  case PM_CASE_OPERATOR:
19224  pm_parser_scope_push(parser, true);
19225  lex_state_set(parser, PM_LEX_STATE_ENDFN);
19226  parser_lex(parser);
19227 
19228  name = parser->previous;
19229  break;
19230  case PM_TOKEN_IDENTIFIER: {
19231  parser_lex(parser);
19232 
19233  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19234  receiver = parse_variable_call(parser);
19235 
19236  pm_parser_scope_push(parser, true);
19237  lex_state_set(parser, PM_LEX_STATE_FNAME);
19238  parser_lex(parser);
19239 
19240  operator = parser->previous;
19241  name = parse_method_definition_name(parser);
19242  } else {
19243  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19244  pm_parser_scope_push(parser, true);
19245 
19246  name = parser->previous;
19247  }
19248 
19249  break;
19250  }
19254  valid_name = false;
19255  /* fallthrough */
19256  case PM_TOKEN_CONSTANT:
19257  case PM_TOKEN_KEYWORD_NIL:
19258  case PM_TOKEN_KEYWORD_SELF:
19259  case PM_TOKEN_KEYWORD_TRUE:
19264  pm_parser_scope_push(parser, true);
19265  parser_lex(parser);
19266 
19267  pm_token_t identifier = parser->previous;
19268 
19269  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19270  lex_state_set(parser, PM_LEX_STATE_FNAME);
19271  parser_lex(parser);
19272  operator = parser->previous;
19273 
19274  switch (identifier.type) {
19275  case PM_TOKEN_CONSTANT:
19276  receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19277  break;
19279  receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19280  break;
19282  receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19283  break;
19285  receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19286  break;
19287  case PM_TOKEN_KEYWORD_NIL:
19288  receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19289  break;
19290  case PM_TOKEN_KEYWORD_SELF:
19291  receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19292  break;
19293  case PM_TOKEN_KEYWORD_TRUE:
19294  receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19295  break;
19297  receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19298  break;
19300  receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19301  break;
19303  receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19304  break;
19306  receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19307  break;
19308  default:
19309  break;
19310  }
19311 
19312  name = parse_method_definition_name(parser);
19313  } else {
19314  if (!valid_name) {
19315  PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19316  }
19317 
19318  name = identifier;
19319  }
19320  break;
19321  }
19323  // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19324  // the inner expression of this parenthesis should not be
19325  // processed under this context. Thus, the context is popped
19326  // here.
19327  context_pop(parser);
19328  parser_lex(parser);
19329 
19330  pm_token_t lparen = parser->previous;
19331  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19332 
19333  accept1(parser, PM_TOKEN_NEWLINE);
19334  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19335  pm_token_t rparen = parser->previous;
19336 
19337  lex_state_set(parser, PM_LEX_STATE_FNAME);
19338  expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19339 
19340  operator = parser->previous;
19341  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19342 
19343  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19344  // reason as described the above.
19345  pm_parser_scope_push(parser, true);
19346  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19347  name = parse_method_definition_name(parser);
19348  break;
19349  }
19350  default:
19351  pm_parser_scope_push(parser, true);
19352  name = parse_method_definition_name(parser);
19353  break;
19354  }
19355 
19356  pm_token_t lparen;
19357  pm_token_t rparen;
19358  pm_parameters_node_t *params;
19359 
19360  switch (parser->current.type) {
19362  parser_lex(parser);
19363  lparen = parser->previous;
19364 
19365  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19366  params = NULL;
19367  } else {
19368  params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19369  }
19370 
19371  lex_state_set(parser, PM_LEX_STATE_BEG);
19372  parser->command_start = true;
19373 
19374  context_pop(parser);
19375  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19376  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19377  parser->previous.start = parser->previous.end;
19378  parser->previous.type = PM_TOKEN_MISSING;
19379  }
19380 
19381  rparen = parser->previous;
19382  break;
19383  }
19384  case PM_CASE_PARAMETER: {
19385  // If we're about to lex a label, we need to add the label
19386  // state to make sure the next newline is ignored.
19387  if (parser->current.type == PM_TOKEN_LABEL) {
19388  lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19389  }
19390 
19391  lparen = not_provided(parser);
19392  rparen = not_provided(parser);
19393  params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19394 
19395  context_pop(parser);
19396  break;
19397  }
19398  default: {
19399  lparen = not_provided(parser);
19400  rparen = not_provided(parser);
19401  params = NULL;
19402 
19403  context_pop(parser);
19404  break;
19405  }
19406  }
19407 
19408  pm_node_t *statements = NULL;
19409  pm_token_t equal;
19410  pm_token_t end_keyword;
19411 
19412  if (accept1(parser, PM_TOKEN_EQUAL)) {
19413  if (token_is_setter_name(&name)) {
19414  pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19415  }
19416  equal = parser->previous;
19417 
19418  context_push(parser, PM_CONTEXT_DEF);
19419  pm_do_loop_stack_push(parser, false);
19420  statements = (pm_node_t *) pm_statements_node_create(parser);
19421 
19422  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19423 
19424  if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19425  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19426 
19427  pm_token_t rescue_keyword = parser->previous;
19428  pm_node_t *value = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19429  context_pop(parser);
19430 
19431  statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19432  }
19433 
19434  pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19435  pm_do_loop_stack_pop(parser);
19436  context_pop(parser);
19437  end_keyword = not_provided(parser);
19438  } else {
19439  equal = not_provided(parser);
19440 
19441  if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19442  lex_state_set(parser, PM_LEX_STATE_BEG);
19443  parser->command_start = true;
19444  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19445  } else {
19446  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19447  }
19448 
19449  pm_accepts_block_stack_push(parser, true);
19450  pm_do_loop_stack_push(parser, false);
19451 
19453  pm_accepts_block_stack_push(parser, true);
19454  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19455  pm_accepts_block_stack_pop(parser);
19456  }
19457 
19459  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19460  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19461  } else {
19462  parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19463  }
19464 
19465  pm_accepts_block_stack_pop(parser);
19466  pm_do_loop_stack_pop(parser);
19467 
19468  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19469  end_keyword = parser->previous;
19470  }
19471 
19472  pm_constant_id_list_t locals;
19473  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19474  pm_parser_scope_pop(parser);
19475 
19481  pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19482 
19483  flush_block_exits(parser, previous_block_exits);
19484  pm_node_list_free(&current_block_exits);
19485 
19486  return (pm_node_t *) pm_def_node_create(
19487  parser,
19488  name_id,
19489  &name,
19490  receiver,
19491  params,
19492  statements,
19493  &locals,
19494  &def_keyword,
19495  &operator,
19496  &lparen,
19497  &rparen,
19498  &equal,
19499  &end_keyword
19500  );
19501  }
19502  case PM_TOKEN_KEYWORD_DEFINED: {
19503  parser_lex(parser);
19504  pm_token_t keyword = parser->previous;
19505 
19506  pm_token_t lparen;
19507  pm_token_t rparen;
19508  pm_node_t *expression;
19509  context_push(parser, PM_CONTEXT_DEFINED);
19510 
19511  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19512  lparen = parser->previous;
19513  expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19514 
19515  if (parser->recovering) {
19516  rparen = not_provided(parser);
19517  } else {
19518  accept1(parser, PM_TOKEN_NEWLINE);
19519  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19520  rparen = parser->previous;
19521  }
19522  } else {
19523  lparen = not_provided(parser);
19524  rparen = not_provided(parser);
19525  expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19526  }
19527 
19528  context_pop(parser);
19529  return (pm_node_t *) pm_defined_node_create(
19530  parser,
19531  &lparen,
19532  expression,
19533  &rparen,
19534  &PM_LOCATION_TOKEN_VALUE(&keyword)
19535  );
19536  }
19538  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19539  pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19540  }
19541 
19542  parser_lex(parser);
19543  pm_token_t keyword = parser->previous;
19544 
19545  if (context_def_p(parser)) {
19546  pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19547  }
19548 
19549  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19550  pm_token_t opening = parser->previous;
19551  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19552 
19553  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19554  return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19555  }
19557  parser_lex(parser);
19558  return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19559  case PM_TOKEN_KEYWORD_FOR: {
19560  size_t opening_newline_index = token_newline_index(parser);
19561  parser_lex(parser);
19562 
19563  pm_token_t for_keyword = parser->previous;
19564  pm_node_t *index;
19565 
19566  context_push(parser, PM_CONTEXT_FOR_INDEX);
19567 
19568  // First, parse out the first index expression.
19569  if (accept1(parser, PM_TOKEN_USTAR)) {
19570  pm_token_t star_operator = parser->previous;
19571  pm_node_t *name = NULL;
19572 
19573  if (token_begins_expression_p(parser->current.type)) {
19574  name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19575  }
19576 
19577  index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19578  } else if (token_begins_expression_p(parser->current.type)) {
19579  index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19580  } else {
19581  pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19582  index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19583  }
19584 
19585  // Now, if there are multiple index expressions, parse them out.
19586  if (match1(parser, PM_TOKEN_COMMA)) {
19587  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19588  } else {
19589  index = parse_target(parser, index, false, false);
19590  }
19591 
19592  context_pop(parser);
19593  pm_do_loop_stack_push(parser, true);
19594 
19595  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19596  pm_token_t in_keyword = parser->previous;
19597 
19598  pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19599  pm_do_loop_stack_pop(parser);
19600 
19601  pm_token_t do_keyword;
19602  if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19603  do_keyword = parser->previous;
19604  } else {
19605  do_keyword = not_provided(parser);
19606  if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19607  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19608  }
19609  }
19610 
19611  pm_statements_node_t *statements = NULL;
19612  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19613  statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19614  }
19615 
19616  parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19617  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19618 
19619  return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19620  }
19621  case PM_TOKEN_KEYWORD_IF:
19622  if (parser_end_of_line_p(parser)) {
19623  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19624  }
19625 
19626  size_t opening_newline_index = token_newline_index(parser);
19627  bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19628  parser_lex(parser);
19629 
19630  return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19631  case PM_TOKEN_KEYWORD_UNDEF: {
19632  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19633  pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19634  }
19635 
19636  parser_lex(parser);
19637  pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19638  pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19639 
19640  if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19641  pm_node_destroy(parser, name);
19642  } else {
19643  pm_undef_node_append(undef, name);
19644 
19645  while (match1(parser, PM_TOKEN_COMMA)) {
19646  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19647  parser_lex(parser);
19648  name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19649 
19650  if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19651  pm_node_destroy(parser, name);
19652  break;
19653  }
19654 
19655  pm_undef_node_append(undef, name);
19656  }
19657  }
19658 
19659  return (pm_node_t *) undef;
19660  }
19661  case PM_TOKEN_KEYWORD_NOT: {
19662  parser_lex(parser);
19663 
19664  pm_token_t message = parser->previous;
19665  pm_arguments_t arguments = { 0 };
19666  pm_node_t *receiver = NULL;
19667 
19668  accept1(parser, PM_TOKEN_NEWLINE);
19669 
19670  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19671  arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19672 
19673  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19674  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19675  } else {
19676  receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19677 
19678  if (!parser->recovering) {
19679  accept1(parser, PM_TOKEN_NEWLINE);
19680  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19681  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19682  }
19683  }
19684  } else {
19685  receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19686  }
19687 
19688  return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19689  }
19690  case PM_TOKEN_KEYWORD_UNLESS: {
19691  size_t opening_newline_index = token_newline_index(parser);
19692  parser_lex(parser);
19693 
19694  return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19695  }
19696  case PM_TOKEN_KEYWORD_MODULE: {
19697  pm_node_list_t current_block_exits = { 0 };
19698  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19699 
19700  size_t opening_newline_index = token_newline_index(parser);
19701  parser_lex(parser);
19702  pm_token_t module_keyword = parser->previous;
19703 
19704  pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19705  pm_token_t name;
19706 
19707  // If we can recover from a syntax error that occurred while parsing
19708  // the name of the module, then we'll handle that here.
19709  if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19710  pop_block_exits(parser, previous_block_exits);
19711  pm_node_list_free(&current_block_exits);
19712 
19713  pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19714  return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19715  }
19716 
19717  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19718  pm_token_t double_colon = parser->previous;
19719 
19720  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19721  constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19722  }
19723 
19724  // Here we retrieve the name of the module. If it wasn't a constant,
19725  // then it's possible that `module foo` was passed, which is a
19726  // syntax error. We handle that here as well.
19727  name = parser->previous;
19728  if (name.type != PM_TOKEN_CONSTANT) {
19729  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19730  }
19731 
19732  pm_parser_scope_push(parser, true);
19733  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19734  pm_node_t *statements = NULL;
19735 
19737  pm_accepts_block_stack_push(parser, true);
19738  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19739  pm_accepts_block_stack_pop(parser);
19740  }
19741 
19743  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19744  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19745  } else {
19746  parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19747  }
19748 
19749  pm_constant_id_list_t locals;
19750  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19751 
19752  pm_parser_scope_pop(parser);
19753  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19754 
19755  if (context_def_p(parser)) {
19756  pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19757  }
19758 
19759  pop_block_exits(parser, previous_block_exits);
19760  pm_node_list_free(&current_block_exits);
19761 
19762  return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19763  }
19764  case PM_TOKEN_KEYWORD_NIL:
19765  parser_lex(parser);
19766  return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19767  case PM_TOKEN_KEYWORD_REDO: {
19768  parser_lex(parser);
19769 
19770  pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19771  if (!parser->partial_script) parse_block_exit(parser, node);
19772 
19773  return node;
19774  }
19775  case PM_TOKEN_KEYWORD_RETRY: {
19776  parser_lex(parser);
19777 
19778  pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19779  parse_retry(parser, node);
19780 
19781  return node;
19782  }
19783  case PM_TOKEN_KEYWORD_SELF:
19784  parser_lex(parser);
19785  return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19786  case PM_TOKEN_KEYWORD_TRUE:
19787  parser_lex(parser);
19788  return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19789  case PM_TOKEN_KEYWORD_UNTIL: {
19790  size_t opening_newline_index = token_newline_index(parser);
19791 
19792  context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19793  pm_do_loop_stack_push(parser, true);
19794 
19795  parser_lex(parser);
19796  pm_token_t keyword = parser->previous;
19797  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19798 
19799  pm_do_loop_stack_pop(parser);
19800  context_pop(parser);
19801 
19802  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19803  pm_statements_node_t *statements = NULL;
19804 
19805  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19806  pm_accepts_block_stack_push(parser, true);
19807  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19808  pm_accepts_block_stack_pop(parser);
19809  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19810  }
19811 
19812  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19813  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19814 
19815  return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19816  }
19817  case PM_TOKEN_KEYWORD_WHILE: {
19818  size_t opening_newline_index = token_newline_index(parser);
19819 
19820  context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19821  pm_do_loop_stack_push(parser, true);
19822 
19823  parser_lex(parser);
19824  pm_token_t keyword = parser->previous;
19825  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19826 
19827  pm_do_loop_stack_pop(parser);
19828  context_pop(parser);
19829 
19830  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19831  pm_statements_node_t *statements = NULL;
19832 
19833  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19834  pm_accepts_block_stack_push(parser, true);
19835  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19836  pm_accepts_block_stack_pop(parser);
19837  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19838  }
19839 
19840  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19841  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19842 
19843  return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19844  }
19845  case PM_TOKEN_PERCENT_LOWER_I: {
19846  parser_lex(parser);
19847  pm_token_t opening = parser->previous;
19848  pm_array_node_t *array = pm_array_node_create(parser, &opening);
19849 
19850  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19851  accept1(parser, PM_TOKEN_WORDS_SEP);
19852  if (match1(parser, PM_TOKEN_STRING_END)) break;
19853 
19854  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19855  pm_token_t opening = not_provided(parser);
19856  pm_token_t closing = not_provided(parser);
19857  pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19858  }
19859 
19860  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19861  }
19862 
19863  pm_token_t closing = parser->current;
19864  if (match1(parser, PM_TOKEN_EOF)) {
19865  pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19866  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19867  } else {
19868  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19869  }
19870  pm_array_node_close_set(array, &closing);
19871 
19872  return (pm_node_t *) array;
19873  }
19874  case PM_TOKEN_PERCENT_UPPER_I: {
19875  parser_lex(parser);
19876  pm_token_t opening = parser->previous;
19877  pm_array_node_t *array = pm_array_node_create(parser, &opening);
19878 
19879  // This is the current node that we are parsing that will be added to the
19880  // list of elements.
19881  pm_node_t *current = NULL;
19882 
19883  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19884  switch (parser->current.type) {
19885  case PM_TOKEN_WORDS_SEP: {
19886  if (current == NULL) {
19887  // If we hit a separator before we have any content, then we don't
19888  // need to do anything.
19889  } else {
19890  // If we hit a separator after we've hit content, then we need to
19891  // append that content to the list and reset the current node.
19892  pm_array_node_elements_append(array, current);
19893  current = NULL;
19894  }
19895 
19896  parser_lex(parser);
19897  break;
19898  }
19899  case PM_TOKEN_STRING_CONTENT: {
19900  pm_token_t opening = not_provided(parser);
19901  pm_token_t closing = not_provided(parser);
19902 
19903  if (current == NULL) {
19904  // If we hit content and the current node is NULL, then this is
19905  // the first string content we've seen. In that case we're going
19906  // to create a new string node and set that to the current.
19907  current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
19908  parser_lex(parser);
19909  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19910  // If we hit string content and the current node is an
19911  // interpolated string, then we need to append the string content
19912  // to the list of child nodes.
19913  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
19914  parser_lex(parser);
19915 
19916  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19917  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19918  // If we hit string content and the current node is a symbol node,
19919  // then we need to convert the current node into an interpolated
19920  // string and add the string content to the list of child nodes.
19921  pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19922  pm_token_t bounds = not_provided(parser);
19923 
19924  pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19925  pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
19926  pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
19927  parser_lex(parser);
19928 
19929  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19930  pm_interpolated_symbol_node_append(interpolated, first_string);
19931  pm_interpolated_symbol_node_append(interpolated, second_string);
19932 
19933  xfree(current);
19934  current = (pm_node_t *) interpolated;
19935  } else {
19936  assert(false && "unreachable");
19937  }
19938 
19939  break;
19940  }
19941  case PM_TOKEN_EMBVAR: {
19942  bool start_location_set = false;
19943  if (current == NULL) {
19944  // If we hit an embedded variable and the current node is NULL,
19945  // then this is the start of a new string. We'll set the current
19946  // node to a new interpolated string.
19947  pm_token_t opening = not_provided(parser);
19948  pm_token_t closing = not_provided(parser);
19949  current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19950  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19951  // If we hit an embedded variable and the current node is a string
19952  // node, then we'll convert the current into an interpolated
19953  // string and add the string node to the list of parts.
19954  pm_token_t opening = not_provided(parser);
19955  pm_token_t closing = not_provided(parser);
19956  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19957 
19958  current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
19959  pm_interpolated_symbol_node_append(interpolated, current);
19960  interpolated->base.location.start = current->location.start;
19961  start_location_set = true;
19962  current = (pm_node_t *) interpolated;
19963  } else {
19964  // If we hit an embedded variable and the current node is an
19965  // interpolated string, then we'll just add the embedded variable.
19966  }
19967 
19968  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19969  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19970  if (!start_location_set) {
19971  current->location.start = part->location.start;
19972  }
19973  break;
19974  }
19975  case PM_TOKEN_EMBEXPR_BEGIN: {
19976  bool start_location_set = false;
19977  if (current == NULL) {
19978  // If we hit an embedded expression and the current node is NULL,
19979  // then this is the start of a new string. We'll set the current
19980  // node to a new interpolated string.
19981  pm_token_t opening = not_provided(parser);
19982  pm_token_t closing = not_provided(parser);
19983  current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19984  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19985  // If we hit an embedded expression and the current node is a
19986  // string node, then we'll convert the current into an
19987  // interpolated string and add the string node to the list of
19988  // parts.
19989  pm_token_t opening = not_provided(parser);
19990  pm_token_t closing = not_provided(parser);
19991  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19992 
19993  current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
19994  pm_interpolated_symbol_node_append(interpolated, current);
19995  interpolated->base.location.start = current->location.start;
19996  start_location_set = true;
19997  current = (pm_node_t *) interpolated;
19998  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19999  // If we hit an embedded expression and the current node is an
20000  // interpolated string, then we'll just continue on.
20001  } else {
20002  assert(false && "unreachable");
20003  }
20004 
20005  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20006  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20007  if (!start_location_set) {
20008  current->location.start = part->location.start;
20009  }
20010  break;
20011  }
20012  default:
20013  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20014  parser_lex(parser);
20015  break;
20016  }
20017  }
20018 
20019  // If we have a current node, then we need to append it to the list.
20020  if (current) {
20021  pm_array_node_elements_append(array, current);
20022  }
20023 
20024  pm_token_t closing = parser->current;
20025  if (match1(parser, PM_TOKEN_EOF)) {
20026  pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20027  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20028  } else {
20029  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20030  }
20031  pm_array_node_close_set(array, &closing);
20032 
20033  return (pm_node_t *) array;
20034  }
20035  case PM_TOKEN_PERCENT_LOWER_W: {
20036  parser_lex(parser);
20037  pm_token_t opening = parser->previous;
20038  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20039 
20040  // skip all leading whitespaces
20041  accept1(parser, PM_TOKEN_WORDS_SEP);
20042 
20043  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20044  accept1(parser, PM_TOKEN_WORDS_SEP);
20045  if (match1(parser, PM_TOKEN_STRING_END)) break;
20046 
20047  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20048  pm_token_t opening = not_provided(parser);
20049  pm_token_t closing = not_provided(parser);
20050 
20051  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20052  pm_array_node_elements_append(array, string);
20053  }
20054 
20055  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20056  }
20057 
20058  pm_token_t closing = parser->current;
20059  if (match1(parser, PM_TOKEN_EOF)) {
20060  pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20061  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20062  } else {
20063  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20064  }
20065 
20066  pm_array_node_close_set(array, &closing);
20067  return (pm_node_t *) array;
20068  }
20069  case PM_TOKEN_PERCENT_UPPER_W: {
20070  parser_lex(parser);
20071  pm_token_t opening = parser->previous;
20072  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20073 
20074  // This is the current node that we are parsing that will be added
20075  // to the list of elements.
20076  pm_node_t *current = NULL;
20077 
20078  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20079  switch (parser->current.type) {
20080  case PM_TOKEN_WORDS_SEP: {
20081  // Reset the explicit encoding if we hit a separator
20082  // since each element can have its own encoding.
20083  parser->explicit_encoding = NULL;
20084 
20085  if (current == NULL) {
20086  // If we hit a separator before we have any content,
20087  // then we don't need to do anything.
20088  } else {
20089  // If we hit a separator after we've hit content,
20090  // then we need to append that content to the list
20091  // and reset the current node.
20092  pm_array_node_elements_append(array, current);
20093  current = NULL;
20094  }
20095 
20096  parser_lex(parser);
20097  break;
20098  }
20099  case PM_TOKEN_STRING_CONTENT: {
20100  pm_token_t opening = not_provided(parser);
20101  pm_token_t closing = not_provided(parser);
20102 
20103  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20104  pm_node_flag_set(string, parse_unescaped_encoding(parser));
20105  parser_lex(parser);
20106 
20107  if (current == NULL) {
20108  // If we hit content and the current node is NULL,
20109  // then this is the first string content we've seen.
20110  // In that case we're going to create a new string
20111  // node and set that to the current.
20112  current = string;
20113  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20114  // If we hit string content and the current node is
20115  // an interpolated string, then we need to append
20116  // the string content to the list of child nodes.
20117  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20118  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20119  // If we hit string content and the current node is
20120  // a string node, then we need to convert the
20121  // current node into an interpolated string and add
20122  // the string content to the list of child nodes.
20123  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20124  pm_interpolated_string_node_append(interpolated, current);
20125  pm_interpolated_string_node_append(interpolated, string);
20126  current = (pm_node_t *) interpolated;
20127  } else {
20128  assert(false && "unreachable");
20129  }
20130 
20131  break;
20132  }
20133  case PM_TOKEN_EMBVAR: {
20134  if (current == NULL) {
20135  // If we hit an embedded variable and the current
20136  // node is NULL, then this is the start of a new
20137  // string. We'll set the current node to a new
20138  // interpolated string.
20139  pm_token_t opening = not_provided(parser);
20140  pm_token_t closing = not_provided(parser);
20141  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20142  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20143  // If we hit an embedded variable and the current
20144  // node is a string node, then we'll convert the
20145  // current into an interpolated string and add the
20146  // string node to the list of parts.
20147  pm_token_t opening = not_provided(parser);
20148  pm_token_t closing = not_provided(parser);
20149  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20150  pm_interpolated_string_node_append(interpolated, current);
20151  current = (pm_node_t *) interpolated;
20152  } else {
20153  // If we hit an embedded variable and the current
20154  // node is an interpolated string, then we'll just
20155  // add the embedded variable.
20156  }
20157 
20158  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20159  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20160  break;
20161  }
20162  case PM_TOKEN_EMBEXPR_BEGIN: {
20163  if (current == NULL) {
20164  // If we hit an embedded expression and the current
20165  // node is NULL, then this is the start of a new
20166  // string. We'll set the current node to a new
20167  // interpolated string.
20168  pm_token_t opening = not_provided(parser);
20169  pm_token_t closing = not_provided(parser);
20170  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20171  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20172  // If we hit an embedded expression and the current
20173  // node is a string node, then we'll convert the
20174  // current into an interpolated string and add the
20175  // string node to the list of parts.
20176  pm_token_t opening = not_provided(parser);
20177  pm_token_t closing = not_provided(parser);
20178  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20179  pm_interpolated_string_node_append(interpolated, current);
20180  current = (pm_node_t *) interpolated;
20181  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20182  // If we hit an embedded expression and the current
20183  // node is an interpolated string, then we'll just
20184  // continue on.
20185  } else {
20186  assert(false && "unreachable");
20187  }
20188 
20189  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20190  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20191  break;
20192  }
20193  default:
20194  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20195  parser_lex(parser);
20196  break;
20197  }
20198  }
20199 
20200  // If we have a current node, then we need to append it to the list.
20201  if (current) {
20202  pm_array_node_elements_append(array, current);
20203  }
20204 
20205  pm_token_t closing = parser->current;
20206  if (match1(parser, PM_TOKEN_EOF)) {
20207  pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20208  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20209  } else {
20210  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20211  }
20212 
20213  pm_array_node_close_set(array, &closing);
20214  return (pm_node_t *) array;
20215  }
20216  case PM_TOKEN_REGEXP_BEGIN: {
20217  pm_token_t opening = parser->current;
20218  parser_lex(parser);
20219 
20220  if (match1(parser, PM_TOKEN_REGEXP_END)) {
20221  // If we get here, then we have an end immediately after a start. In
20222  // that case we'll create an empty content token and return an
20223  // uninterpolated regular expression.
20224  pm_token_t content = (pm_token_t) {
20226  .start = parser->previous.end,
20227  .end = parser->previous.end
20228  };
20229 
20230  parser_lex(parser);
20231 
20232  pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20234 
20235  return node;
20236  }
20237 
20239 
20240  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20241  // In this case we've hit string content so we know the regular
20242  // expression at least has something in it. We'll need to check if the
20243  // following token is the end (in which case we can return a plain
20244  // regular expression) or if it's not then it has interpolation.
20245  pm_string_t unescaped = parser->current_string;
20246  pm_token_t content = parser->current;
20247  bool ascii_only = parser->current_regular_expression_ascii_only;
20248  parser_lex(parser);
20249 
20250  // If we hit an end, then we can create a regular expression
20251  // node without interpolation, which can be represented more
20252  // succinctly and more easily compiled.
20253  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20254  pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20255 
20256  // If we're not immediately followed by a =~, then we want
20257  // to parse all of the errors at this point. If it is
20258  // followed by a =~, then it will get parsed higher up while
20259  // parsing the named captures as well.
20260  if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20261  parse_regular_expression_errors(parser, node);
20262  }
20263 
20264  pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20265  return (pm_node_t *) node;
20266  }
20267 
20268  // If we get here, then we have interpolation so we'll need to create
20269  // a regular expression node with interpolation.
20270  interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20271 
20272  pm_token_t opening = not_provided(parser);
20273  pm_token_t closing = not_provided(parser);
20274  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20275 
20276  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20277  // This is extremely strange, but the first string part of a
20278  // regular expression will always be tagged as binary if we
20279  // are in a US-ASCII file, no matter its contents.
20280  pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20281  }
20282 
20283  pm_interpolated_regular_expression_node_append(interpolated, part);
20284  } else {
20285  // If the first part of the body of the regular expression is not a
20286  // string content, then we have interpolation and we need to create an
20287  // interpolated regular expression node.
20288  interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20289  }
20290 
20291  // Now that we're here and we have interpolation, we'll parse all of the
20292  // parts into the list.
20293  pm_node_t *part;
20294  while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20295  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20296  pm_interpolated_regular_expression_node_append(interpolated, part);
20297  }
20298  }
20299 
20300  pm_token_t closing = parser->current;
20301  if (match1(parser, PM_TOKEN_EOF)) {
20302  pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20303  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20304  } else {
20305  expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20306  }
20307 
20308  pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20309  return (pm_node_t *) interpolated;
20310  }
20311  case PM_TOKEN_BACKTICK:
20312  case PM_TOKEN_PERCENT_LOWER_X: {
20313  parser_lex(parser);
20314  pm_token_t opening = parser->previous;
20315 
20316  // When we get here, we don't know if this string is going to have
20317  // interpolation or not, even though it is allowed. Still, we want to be
20318  // able to return a string node without interpolation if we can since
20319  // it'll be faster.
20320  if (match1(parser, PM_TOKEN_STRING_END)) {
20321  // If we get here, then we have an end immediately after a start. In
20322  // that case we'll create an empty content token and return an
20323  // uninterpolated string.
20324  pm_token_t content = (pm_token_t) {
20326  .start = parser->previous.end,
20327  .end = parser->previous.end
20328  };
20329 
20330  parser_lex(parser);
20331  return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20332  }
20333 
20335 
20336  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20337  // In this case we've hit string content so we know the string
20338  // at least has something in it. We'll need to check if the
20339  // following token is the end (in which case we can return a
20340  // plain string) or if it's not then it has interpolation.
20341  pm_string_t unescaped = parser->current_string;
20342  pm_token_t content = parser->current;
20343  parser_lex(parser);
20344 
20345  if (match1(parser, PM_TOKEN_STRING_END)) {
20346  pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20347  pm_node_flag_set(node, parse_unescaped_encoding(parser));
20348  parser_lex(parser);
20349  return node;
20350  }
20351 
20352  // If we get here, then we have interpolation so we'll need to
20353  // create a string node with interpolation.
20354  node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20355 
20356  pm_token_t opening = not_provided(parser);
20357  pm_token_t closing = not_provided(parser);
20358 
20359  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20360  pm_node_flag_set(part, parse_unescaped_encoding(parser));
20361 
20362  pm_interpolated_xstring_node_append(node, part);
20363  } else {
20364  // If the first part of the body of the string is not a string
20365  // content, then we have interpolation and we need to create an
20366  // interpolated string node.
20367  node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20368  }
20369 
20370  pm_node_t *part;
20371  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20372  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20373  pm_interpolated_xstring_node_append(node, part);
20374  }
20375  }
20376 
20377  pm_token_t closing = parser->current;
20378  if (match1(parser, PM_TOKEN_EOF)) {
20379  pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20380  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20381  } else {
20382  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20383  }
20384  pm_interpolated_xstring_node_closing_set(node, &closing);
20385 
20386  return (pm_node_t *) node;
20387  }
20388  case PM_TOKEN_USTAR: {
20389  parser_lex(parser);
20390 
20391  // * operators at the beginning of expressions are only valid in the
20392  // context of a multiple assignment. We enforce that here. We'll
20393  // still lex past it though and create a missing node place.
20394  if (binding_power != PM_BINDING_POWER_STATEMENT) {
20395  pm_parser_err_prefix(parser, diag_id);
20396  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20397  }
20398 
20399  pm_token_t operator = parser->previous;
20400  pm_node_t *name = NULL;
20401 
20402  if (token_begins_expression_p(parser->current.type)) {
20403  name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20404  }
20405 
20406  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20407 
20408  if (match1(parser, PM_TOKEN_COMMA)) {
20409  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20410  } else {
20411  return parse_target_validate(parser, splat, true);
20412  }
20413  }
20414  case PM_TOKEN_BANG: {
20415  if (binding_power > PM_BINDING_POWER_UNARY) {
20416  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20417  }
20418 
20419  parser_lex(parser);
20420 
20421  pm_token_t operator = parser->previous;
20422  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20423  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20424 
20425  pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20426  return (pm_node_t *) node;
20427  }
20428  case PM_TOKEN_TILDE: {
20429  if (binding_power > PM_BINDING_POWER_UNARY) {
20430  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20431  }
20432  parser_lex(parser);
20433 
20434  pm_token_t operator = parser->previous;
20435  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20436  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20437 
20438  return (pm_node_t *) node;
20439  }
20440  case PM_TOKEN_UMINUS: {
20441  if (binding_power > PM_BINDING_POWER_UNARY) {
20442  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20443  }
20444  parser_lex(parser);
20445 
20446  pm_token_t operator = parser->previous;
20447  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20448  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20449 
20450  return (pm_node_t *) node;
20451  }
20452  case PM_TOKEN_UMINUS_NUM: {
20453  parser_lex(parser);
20454 
20455  pm_token_t operator = parser->previous;
20456  pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20457 
20458  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20459  pm_token_t exponent_operator = parser->previous;
20460  pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20461  node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20462  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20463  } else {
20464  switch (PM_NODE_TYPE(node)) {
20465  case PM_INTEGER_NODE:
20466  case PM_FLOAT_NODE:
20467  case PM_RATIONAL_NODE:
20468  case PM_IMAGINARY_NODE:
20469  parse_negative_numeric(node);
20470  break;
20471  default:
20472  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20473  break;
20474  }
20475  }
20476 
20477  return node;
20478  }
20479  case PM_TOKEN_MINUS_GREATER: {
20480  int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20481  parser->lambda_enclosure_nesting = parser->enclosure_nesting;
20482 
20483  size_t opening_newline_index = token_newline_index(parser);
20484  pm_accepts_block_stack_push(parser, true);
20485  parser_lex(parser);
20486 
20487  pm_token_t operator = parser->previous;
20488  pm_parser_scope_push(parser, false);
20489 
20490  pm_block_parameters_node_t *block_parameters;
20491 
20492  switch (parser->current.type) {
20494  pm_token_t opening = parser->current;
20495  parser_lex(parser);
20496 
20497  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20498  block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20499  } else {
20500  block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20501  }
20502 
20503  accept1(parser, PM_TOKEN_NEWLINE);
20504  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20505 
20506  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20507  break;
20508  }
20509  case PM_CASE_PARAMETER: {
20510  pm_accepts_block_stack_push(parser, false);
20511  pm_token_t opening = not_provided(parser);
20512  block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20513  pm_accepts_block_stack_pop(parser);
20514  break;
20515  }
20516  default: {
20517  block_parameters = NULL;
20518  break;
20519  }
20520  }
20521 
20522  pm_token_t opening;
20523  pm_node_t *body = NULL;
20524  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20525 
20526  if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20527  opening = parser->previous;
20528 
20529  if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20530  body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20531  }
20532 
20533  parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20534  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20535  } else {
20536  expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20537  opening = parser->previous;
20538 
20540  pm_accepts_block_stack_push(parser, true);
20541  body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20542  pm_accepts_block_stack_pop(parser);
20543  }
20544 
20545  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20546  assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20547  body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20548  } else {
20549  parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20550  }
20551 
20552  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20553  }
20554 
20555  pm_constant_id_list_t locals;
20556  pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20557  pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20558 
20559  pm_parser_scope_pop(parser);
20560  pm_accepts_block_stack_pop(parser);
20561 
20562  return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20563  }
20564  case PM_TOKEN_UPLUS: {
20565  if (binding_power > PM_BINDING_POWER_UNARY) {
20566  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20567  }
20568  parser_lex(parser);
20569 
20570  pm_token_t operator = parser->previous;
20571  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20572  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20573 
20574  return (pm_node_t *) node;
20575  }
20576  case PM_TOKEN_STRING_BEGIN:
20577  return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20578  case PM_TOKEN_SYMBOL_BEGIN: {
20579  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20580  parser_lex(parser);
20581 
20582  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20583  }
20584  default: {
20585  pm_context_t recoverable = context_recoverable(parser, &parser->current);
20586 
20587  if (recoverable != PM_CONTEXT_NONE) {
20588  parser->recovering = true;
20589 
20590  // If the given error is not the generic one, then we'll add it
20591  // here because it will provide more context in addition to the
20592  // recoverable error that we will also add.
20593  if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20594  pm_parser_err_prefix(parser, diag_id);
20595  }
20596 
20597  // If we get here, then we are assuming this token is closing a
20598  // parent context, so we'll indicate that to the user so that
20599  // they know how we behaved.
20600  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20601  } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20602  // We're going to make a special case here, because "cannot
20603  // parse expression" is pretty generic, and we know here that we
20604  // have an unexpected token.
20605  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20606  } else {
20607  pm_parser_err_prefix(parser, diag_id);
20608  }
20609 
20610  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20611  }
20612  }
20613 }
20614 
20624 static pm_node_t *
20625 parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20626  pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20627 
20628  // Contradicting binding powers, the right-hand-side value of the assignment
20629  // allows the `rescue` modifier.
20630  if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20631  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20632 
20633  pm_token_t rescue = parser->current;
20634  parser_lex(parser);
20635 
20636  pm_node_t *right = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20637  context_pop(parser);
20638 
20639  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20640  }
20641 
20642  return value;
20643 }
20644 
20649 static void
20650 parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20651  switch (PM_NODE_TYPE(node)) {
20652  case PM_BEGIN_NODE: {
20653  const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20654  if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20655  break;
20656  }
20659  pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20660  break;
20661  }
20662  case PM_PARENTHESES_NODE: {
20663  const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20664  if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20665  break;
20666  }
20667  case PM_STATEMENTS_NODE: {
20668  const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20669  const pm_node_t *statement;
20670 
20671  PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20672  parse_assignment_value_local(parser, statement);
20673  }
20674  break;
20675  }
20676  default:
20677  break;
20678  }
20679 }
20680 
20693 static pm_node_t *
20694 parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20695  bool permitted = true;
20696  if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20697 
20698  pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20699  if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20700 
20701  parse_assignment_value_local(parser, value);
20702  bool single_value = true;
20703 
20704  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20705  single_value = false;
20706 
20707  pm_token_t opening = not_provided(parser);
20708  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20709 
20710  pm_array_node_elements_append(array, value);
20711  value = (pm_node_t *) array;
20712 
20713  while (accept1(parser, PM_TOKEN_COMMA)) {
20714  pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20715 
20716  pm_array_node_elements_append(array, element);
20717  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20718 
20719  parse_assignment_value_local(parser, element);
20720  }
20721  }
20722 
20723  // Contradicting binding powers, the right-hand-side value of the assignment
20724  // allows the `rescue` modifier.
20725  if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20726  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20727 
20728  pm_token_t rescue = parser->current;
20729  parser_lex(parser);
20730 
20731  bool accepts_command_call_inner = false;
20732 
20733  // RHS can accept command call iff the value is a call with arguments
20734  // but without parenthesis.
20735  if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20736  pm_call_node_t *call_node = (pm_call_node_t *) value;
20737  if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20738  accepts_command_call_inner = true;
20739  }
20740  }
20741 
20742  pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20743  context_pop(parser);
20744 
20745  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20746  }
20747 
20748  return value;
20749 }
20750 
20758 static void
20759 parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20760  if (call_node->arguments != NULL) {
20761  pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20762  pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20763  call_node->arguments = NULL;
20764  }
20765 
20766  if (call_node->block != NULL) {
20767  pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20768  pm_node_destroy(parser, (pm_node_t *) call_node->block);
20769  call_node->block = NULL;
20770  }
20771 }
20772 
20777 typedef struct {
20780 
20783 
20786 
20789 
20795  bool shared;
20797 
20802 static void
20803 parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20805 
20806  pm_parser_t *parser = callback_data->parser;
20807  pm_call_node_t *call = callback_data->call;
20808  pm_constant_id_list_t *names = &callback_data->names;
20809 
20810  const uint8_t *source = pm_string_source(capture);
20811  size_t length = pm_string_length(capture);
20812 
20813  pm_location_t location;
20814  pm_constant_id_t name;
20815 
20816  // If the name of the capture group isn't a valid identifier, we do
20817  // not add it to the local table.
20818  if (!pm_slice_is_valid_local(parser, source, source + length)) return;
20819 
20820  if (callback_data->shared) {
20821  // If the unescaped string is a slice of the source, then we can
20822  // copy the names directly. The pointers will line up.
20823  location = (pm_location_t) { .start = source, .end = source + length };
20824  name = pm_parser_constant_id_location(parser, location.start, location.end);
20825  } else {
20826  // Otherwise, the name is a slice of the malloc-ed owned string,
20827  // in which case we need to copy it out into a new string.
20828  location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20829 
20830  void *memory = xmalloc(length);
20831  if (memory == NULL) abort();
20832 
20833  memcpy(memory, source, length);
20834  name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20835  }
20836 
20837  // Add this name to the list of constants if it is valid, not duplicated,
20838  // and not a keyword.
20839  if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20840  pm_constant_id_list_append(names, name);
20841 
20842  int depth;
20843  if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20844  // If the local is not already a local but it is a keyword, then we
20845  // do not want to add a capture for this.
20846  if (pm_local_is_keyword((const char *) source, length)) return;
20847 
20848  // If the identifier is not already a local, then we will add it to
20849  // the local table.
20850  pm_parser_local_add(parser, name, location.start, location.end, 0);
20851  }
20852 
20853  // Here we lazily create the MatchWriteNode since we know we're
20854  // about to add a target.
20855  if (callback_data->match == NULL) {
20856  callback_data->match = pm_match_write_node_create(parser, call);
20857  }
20858 
20859  // Next, create the local variable target and add it to the list of
20860  // targets for the match.
20861  pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20862  pm_node_list_append(&callback_data->match->targets, target);
20863  }
20864 }
20865 
20870 static pm_node_t *
20871 parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20873  .parser = parser,
20874  .call = call,
20875  .names = { 0 },
20876  .shared = content->type == PM_STRING_SHARED
20877  };
20878 
20880  .parser = parser,
20881  .start = call->receiver->location.start,
20882  .end = call->receiver->location.end,
20883  .shared = content->type == PM_STRING_SHARED
20884  };
20885 
20886  pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20887  pm_constant_id_list_free(&callback_data.names);
20888 
20889  if (callback_data.match != NULL) {
20890  return (pm_node_t *) callback_data.match;
20891  } else {
20892  return (pm_node_t *) call;
20893  }
20894 }
20895 
20896 static inline pm_node_t *
20897 parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20898  pm_token_t token = parser->current;
20899 
20900  switch (token.type) {
20901  case PM_TOKEN_EQUAL: {
20902  switch (PM_NODE_TYPE(node)) {
20903  case PM_CALL_NODE: {
20904  // If we have no arguments to the call node and we need this
20905  // to be a target then this is either a method call or a
20906  // local variable write. This _must_ happen before the value
20907  // is parsed because it could be referenced in the value.
20908  pm_call_node_t *call_node = (pm_call_node_t *) node;
20910  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20911  }
20912  }
20913  /* fallthrough */
20914  case PM_CASE_WRITABLE: {
20915  parser_lex(parser);
20916  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20917 
20918  if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20919  pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20920  }
20921 
20922  return parse_write(parser, node, &token, value);
20923  }
20924  case PM_SPLAT_NODE: {
20925  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20926  pm_multi_target_node_targets_append(parser, multi_target, node);
20927 
20928  parser_lex(parser);
20929  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20930  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
20931  }
20933  case PM_FALSE_NODE:
20934  case PM_SOURCE_FILE_NODE:
20935  case PM_SOURCE_LINE_NODE:
20936  case PM_NIL_NODE:
20937  case PM_SELF_NODE:
20938  case PM_TRUE_NODE: {
20939  // In these special cases, we have specific error messages
20940  // and we will replace them with local variable writes.
20941  parser_lex(parser);
20942  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20943  return parse_unwriteable_write(parser, node, &token, value);
20944  }
20945  default:
20946  // In this case we have an = sign, but we don't know what
20947  // it's for. We need to treat it as an error. We'll mark it
20948  // as an error and skip past it.
20949  parser_lex(parser);
20950  pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20951  return node;
20952  }
20953  }
20955  switch (PM_NODE_TYPE(node)) {
20958  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20959  /* fallthrough */
20961  parser_lex(parser);
20962 
20963  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20964  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
20965 
20966  pm_node_destroy(parser, node);
20967  return result;
20968  }
20970  parser_lex(parser);
20971 
20972  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20973  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
20974 
20975  pm_node_destroy(parser, node);
20976  return result;
20977  }
20978  case PM_CONSTANT_PATH_NODE: {
20979  parser_lex(parser);
20980 
20981  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20982  pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
20983 
20984  return parse_shareable_constant_write(parser, write);
20985  }
20986  case PM_CONSTANT_READ_NODE: {
20987  parser_lex(parser);
20988 
20989  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20990  pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
20991 
20992  pm_node_destroy(parser, node);
20993  return parse_shareable_constant_write(parser, write);
20994  }
20996  parser_lex(parser);
20997 
20998  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20999  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21000 
21001  pm_node_destroy(parser, node);
21002  return result;
21003  }
21006  parser_lex(parser);
21007 
21008  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21009  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21010 
21011  pm_node_destroy(parser, node);
21012  return result;
21013  }
21014  case PM_CALL_NODE: {
21015  pm_call_node_t *cast = (pm_call_node_t *) node;
21016 
21017  // If we have a vcall (a method with no arguments and no
21018  // receiver that could have been a local variable) then we
21019  // will transform it into a local variable write.
21021  pm_location_t *message_loc = &cast->message_loc;
21022  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21023 
21024  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21025  parser_lex(parser);
21026 
21027  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21028  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21029 
21030  pm_node_destroy(parser, (pm_node_t *) cast);
21031  return result;
21032  }
21033 
21034  // Move past the token here so that we have already added
21035  // the local variable by this point.
21036  parser_lex(parser);
21037 
21038  // If there is no call operator and the message is "[]" then
21039  // this is an aref expression, and we can transform it into
21040  // an aset expression.
21041  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21042  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21043  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21044  }
21045 
21046  // If this node cannot be writable, then we have an error.
21047  if (pm_call_node_writable_p(parser, cast)) {
21048  parse_write_name(parser, &cast->name);
21049  } else {
21050  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21051  }
21052 
21053  parse_call_operator_write(parser, cast, &token);
21054  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21055  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21056  }
21057  case PM_MULTI_WRITE_NODE: {
21058  parser_lex(parser);
21059  pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21060  return node;
21061  }
21062  default:
21063  parser_lex(parser);
21064 
21065  // In this case we have an &&= sign, but we don't know what it's for.
21066  // We need to treat it as an error. For now, we'll mark it as an error
21067  // and just skip right past it.
21068  pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21069  return node;
21070  }
21071  }
21072  case PM_TOKEN_PIPE_PIPE_EQUAL: {
21073  switch (PM_NODE_TYPE(node)) {
21076  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21077  /* fallthrough */
21079  parser_lex(parser);
21080 
21081  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21082  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21083 
21084  pm_node_destroy(parser, node);
21085  return result;
21086  }
21088  parser_lex(parser);
21089 
21090  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21091  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21092 
21093  pm_node_destroy(parser, node);
21094  return result;
21095  }
21096  case PM_CONSTANT_PATH_NODE: {
21097  parser_lex(parser);
21098 
21099  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21100  pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21101 
21102  return parse_shareable_constant_write(parser, write);
21103  }
21104  case PM_CONSTANT_READ_NODE: {
21105  parser_lex(parser);
21106 
21107  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21108  pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21109 
21110  pm_node_destroy(parser, node);
21111  return parse_shareable_constant_write(parser, write);
21112  }
21114  parser_lex(parser);
21115 
21116  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21117  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21118 
21119  pm_node_destroy(parser, node);
21120  return result;
21121  }
21124  parser_lex(parser);
21125 
21126  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21127  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21128 
21129  pm_node_destroy(parser, node);
21130  return result;
21131  }
21132  case PM_CALL_NODE: {
21133  pm_call_node_t *cast = (pm_call_node_t *) node;
21134 
21135  // If we have a vcall (a method with no arguments and no
21136  // receiver that could have been a local variable) then we
21137  // will transform it into a local variable write.
21139  pm_location_t *message_loc = &cast->message_loc;
21140  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21141 
21142  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21143  parser_lex(parser);
21144 
21145  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21146  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21147 
21148  pm_node_destroy(parser, (pm_node_t *) cast);
21149  return result;
21150  }
21151 
21152  // Move past the token here so that we have already added
21153  // the local variable by this point.
21154  parser_lex(parser);
21155 
21156  // If there is no call operator and the message is "[]" then
21157  // this is an aref expression, and we can transform it into
21158  // an aset expression.
21159  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21160  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21161  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21162  }
21163 
21164  // If this node cannot be writable, then we have an error.
21165  if (pm_call_node_writable_p(parser, cast)) {
21166  parse_write_name(parser, &cast->name);
21167  } else {
21168  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21169  }
21170 
21171  parse_call_operator_write(parser, cast, &token);
21172  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21173  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21174  }
21175  case PM_MULTI_WRITE_NODE: {
21176  parser_lex(parser);
21177  pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21178  return node;
21179  }
21180  default:
21181  parser_lex(parser);
21182 
21183  // In this case we have an ||= sign, but we don't know what it's for.
21184  // We need to treat it as an error. For now, we'll mark it as an error
21185  // and just skip right past it.
21186  pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21187  return node;
21188  }
21189  }
21191  case PM_TOKEN_CARET_EQUAL:
21194  case PM_TOKEN_MINUS_EQUAL:
21196  case PM_TOKEN_PIPE_EQUAL:
21197  case PM_TOKEN_PLUS_EQUAL:
21198  case PM_TOKEN_SLASH_EQUAL:
21199  case PM_TOKEN_STAR_EQUAL:
21200  case PM_TOKEN_STAR_STAR_EQUAL: {
21201  switch (PM_NODE_TYPE(node)) {
21204  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21205  /* fallthrough */
21207  parser_lex(parser);
21208 
21209  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21210  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21211 
21212  pm_node_destroy(parser, node);
21213  return result;
21214  }
21216  parser_lex(parser);
21217 
21218  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21219  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21220 
21221  pm_node_destroy(parser, node);
21222  return result;
21223  }
21224  case PM_CONSTANT_PATH_NODE: {
21225  parser_lex(parser);
21226 
21227  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21228  pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21229 
21230  return parse_shareable_constant_write(parser, write);
21231  }
21232  case PM_CONSTANT_READ_NODE: {
21233  parser_lex(parser);
21234 
21235  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21236  pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21237 
21238  pm_node_destroy(parser, node);
21239  return parse_shareable_constant_write(parser, write);
21240  }
21242  parser_lex(parser);
21243 
21244  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21245  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21246 
21247  pm_node_destroy(parser, node);
21248  return result;
21249  }
21252  parser_lex(parser);
21253 
21254  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21255  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21256 
21257  pm_node_destroy(parser, node);
21258  return result;
21259  }
21260  case PM_CALL_NODE: {
21261  parser_lex(parser);
21262  pm_call_node_t *cast = (pm_call_node_t *) node;
21263 
21264  // If we have a vcall (a method with no arguments and no
21265  // receiver that could have been a local variable) then we
21266  // will transform it into a local variable write.
21268  pm_location_t *message_loc = &cast->message_loc;
21269  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21270 
21271  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21272  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21273  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21274 
21275  pm_node_destroy(parser, (pm_node_t *) cast);
21276  return result;
21277  }
21278 
21279  // If there is no call operator and the message is "[]" then
21280  // this is an aref expression, and we can transform it into
21281  // an aset expression.
21282  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21283  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21284  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21285  }
21286 
21287  // If this node cannot be writable, then we have an error.
21288  if (pm_call_node_writable_p(parser, cast)) {
21289  parse_write_name(parser, &cast->name);
21290  } else {
21291  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21292  }
21293 
21294  parse_call_operator_write(parser, cast, &token);
21295  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21296  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21297  }
21298  case PM_MULTI_WRITE_NODE: {
21299  parser_lex(parser);
21300  pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21301  return node;
21302  }
21303  default:
21304  parser_lex(parser);
21305 
21306  // In this case we have an operator but we don't know what it's for.
21307  // We need to treat it as an error. For now, we'll mark it as an error
21308  // and just skip right past it.
21309  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21310  return node;
21311  }
21312  }
21314  case PM_TOKEN_KEYWORD_AND: {
21315  parser_lex(parser);
21316 
21317  pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21318  return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21319  }
21320  case PM_TOKEN_KEYWORD_OR:
21321  case PM_TOKEN_PIPE_PIPE: {
21322  parser_lex(parser);
21323 
21324  pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21325  return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21326  }
21327  case PM_TOKEN_EQUAL_TILDE: {
21328  // Note that we _must_ parse the value before adding the local
21329  // variables in order to properly mirror the behavior of Ruby. For
21330  // example,
21331  //
21332  // /(?<foo>bar)/ =~ foo
21333  //
21334  // In this case, `foo` should be a method call and not a local yet.
21335  parser_lex(parser);
21336  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21337 
21338  // By default, we're going to create a call node and then return it.
21339  pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21340  pm_node_t *result = (pm_node_t *) call;
21341 
21342  // If the receiver of this =~ is a regular expression node, then we
21343  // need to introduce local variables for it based on its named
21344  // capture groups.
21346  // It's possible to have an interpolated regular expression node
21347  // that only contains strings. This is because it can be split
21348  // up by a heredoc. In this case we need to concat the unescaped
21349  // strings together and then parse them as a regular expression.
21350  pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
21351 
21352  bool interpolated = false;
21353  size_t total_length = 0;
21354 
21355  pm_node_t *part;
21356  PM_NODE_LIST_FOREACH(parts, index, part) {
21357  if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21358  total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21359  } else {
21360  interpolated = true;
21361  break;
21362  }
21363  }
21364 
21365  if (!interpolated && total_length > 0) {
21366  void *memory = xmalloc(total_length);
21367  if (!memory) abort();
21368 
21369  uint8_t *cursor = memory;
21370  PM_NODE_LIST_FOREACH(parts, index, part) {
21371  pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21372  size_t length = pm_string_length(unescaped);
21373 
21374  memcpy(cursor, pm_string_source(unescaped), length);
21375  cursor += length;
21376  }
21377 
21378  pm_string_t owned;
21379  pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21380 
21381  result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21382  pm_string_free(&owned);
21383  }
21384  } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21385  // If we have a regular expression node, then we can just parse
21386  // the named captures directly off the unescaped string.
21387  const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21388  result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21389  }
21390 
21391  return result;
21392  }
21393  case PM_TOKEN_UAMPERSAND:
21394  case PM_TOKEN_USTAR:
21395  case PM_TOKEN_USTAR_STAR:
21396  // The only times this will occur are when we are in an error state,
21397  // but we'll put them in here so that errors can propagate.
21398  case PM_TOKEN_BANG_EQUAL:
21399  case PM_TOKEN_BANG_TILDE:
21400  case PM_TOKEN_EQUAL_EQUAL:
21403  case PM_TOKEN_CARET:
21404  case PM_TOKEN_PIPE:
21405  case PM_TOKEN_AMPERSAND:
21407  case PM_TOKEN_LESS_LESS:
21408  case PM_TOKEN_MINUS:
21409  case PM_TOKEN_PLUS:
21410  case PM_TOKEN_PERCENT:
21411  case PM_TOKEN_SLASH:
21412  case PM_TOKEN_STAR:
21413  case PM_TOKEN_STAR_STAR: {
21414  parser_lex(parser);
21415  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21416  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21417  }
21418  case PM_TOKEN_GREATER:
21420  case PM_TOKEN_LESS:
21421  case PM_TOKEN_LESS_EQUAL: {
21422  if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21423  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21424  }
21425 
21426  parser_lex(parser);
21427  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21428  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21429  }
21431  case PM_TOKEN_DOT: {
21432  parser_lex(parser);
21433  pm_token_t operator = parser->previous;
21434  pm_arguments_t arguments = { 0 };
21435 
21436  // This if statement handles the foo.() syntax.
21437  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21438  parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21439  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21440  }
21441 
21442  pm_token_t message;
21443 
21444  switch (parser->current.type) {
21445  case PM_CASE_OPERATOR:
21446  case PM_CASE_KEYWORD:
21447  case PM_TOKEN_CONSTANT:
21448  case PM_TOKEN_IDENTIFIER:
21449  case PM_TOKEN_METHOD_NAME: {
21450  parser_lex(parser);
21451  message = parser->previous;
21452  break;
21453  }
21454  default: {
21455  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21456  message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21457  }
21458  }
21459 
21460  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21461  pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21462 
21463  if (
21464  (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21465  arguments.arguments == NULL &&
21466  arguments.opening_loc.start == NULL &&
21467  match1(parser, PM_TOKEN_COMMA)
21468  ) {
21469  return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21470  } else {
21471  return (pm_node_t *) call;
21472  }
21473  }
21474  case PM_TOKEN_DOT_DOT:
21475  case PM_TOKEN_DOT_DOT_DOT: {
21476  parser_lex(parser);
21477 
21478  pm_node_t *right = NULL;
21479  if (token_begins_expression_p(parser->current.type)) {
21480  right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21481  }
21482 
21483  return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21484  }
21486  pm_token_t keyword = parser->current;
21487  parser_lex(parser);
21488 
21489  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21490  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21491  }
21493  pm_token_t keyword = parser->current;
21494  parser_lex(parser);
21495 
21496  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21497  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21498  }
21500  parser_lex(parser);
21501  pm_statements_node_t *statements = pm_statements_node_create(parser);
21502  pm_statements_node_body_append(parser, statements, node, true);
21503 
21504  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21505  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21506  }
21508  parser_lex(parser);
21509  pm_statements_node_t *statements = pm_statements_node_create(parser);
21510  pm_statements_node_body_append(parser, statements, node, true);
21511 
21512  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21513  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21514  }
21515  case PM_TOKEN_QUESTION_MARK: {
21516  context_push(parser, PM_CONTEXT_TERNARY);
21517  pm_node_list_t current_block_exits = { 0 };
21518  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21519 
21520  pm_token_t qmark = parser->current;
21521  parser_lex(parser);
21522 
21523  pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21524 
21525  if (parser->recovering) {
21526  // If parsing the true expression of this ternary resulted in a syntax
21527  // error that we can recover from, then we're going to put missing nodes
21528  // and tokens into the remaining places. We want to be sure to do this
21529  // before the `expect` function call to make sure it doesn't
21530  // accidentally move past a ':' token that occurs after the syntax
21531  // error.
21532  pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21533  pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21534 
21535  context_pop(parser);
21536  pop_block_exits(parser, previous_block_exits);
21537  pm_node_list_free(&current_block_exits);
21538 
21539  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21540  }
21541 
21542  accept1(parser, PM_TOKEN_NEWLINE);
21543  expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21544 
21545  pm_token_t colon = parser->previous;
21546  pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21547 
21548  context_pop(parser);
21549  pop_block_exits(parser, previous_block_exits);
21550  pm_node_list_free(&current_block_exits);
21551 
21552  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21553  }
21554  case PM_TOKEN_COLON_COLON: {
21555  parser_lex(parser);
21556  pm_token_t delimiter = parser->previous;
21557 
21558  switch (parser->current.type) {
21559  case PM_TOKEN_CONSTANT: {
21560  parser_lex(parser);
21561  pm_node_t *path;
21562 
21563  if (
21564  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21565  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21566  ) {
21567  // If we have a constant immediately following a '::' operator, then
21568  // this can either be a constant path or a method call, depending on
21569  // what follows the constant.
21570  //
21571  // If we have parentheses, then this is a method call. That would
21572  // look like Foo::Bar().
21573  pm_token_t message = parser->previous;
21574  pm_arguments_t arguments = { 0 };
21575 
21576  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21577  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21578  } else {
21579  // Otherwise, this is a constant path. That would look like Foo::Bar.
21580  path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21581  }
21582 
21583  // If this is followed by a comma then it is a multiple assignment.
21584  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21585  return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21586  }
21587 
21588  return path;
21589  }
21590  case PM_CASE_OPERATOR:
21591  case PM_CASE_KEYWORD:
21592  case PM_TOKEN_IDENTIFIER:
21593  case PM_TOKEN_METHOD_NAME: {
21594  parser_lex(parser);
21595  pm_token_t message = parser->previous;
21596 
21597  // If we have an identifier following a '::' operator, then it is for
21598  // sure a method call.
21599  pm_arguments_t arguments = { 0 };
21600  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21601  pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21602 
21603  // If this is followed by a comma then it is a multiple assignment.
21604  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21605  return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21606  }
21607 
21608  return (pm_node_t *) call;
21609  }
21611  // If we have a parenthesis following a '::' operator, then it is the
21612  // method call shorthand. That would look like Foo::(bar).
21613  pm_arguments_t arguments = { 0 };
21614  parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21615 
21616  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21617  }
21618  default: {
21619  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21620  return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21621  }
21622  }
21623  }
21625  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21626  parser_lex(parser);
21627  accept1(parser, PM_TOKEN_NEWLINE);
21628 
21629  pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21630  context_pop(parser);
21631 
21632  return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21633  }
21634  case PM_TOKEN_BRACKET_LEFT: {
21635  parser_lex(parser);
21636 
21637  pm_arguments_t arguments = { 0 };
21638  arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21639 
21640  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21641  pm_accepts_block_stack_push(parser, true);
21642  parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21643  pm_accepts_block_stack_pop(parser);
21644  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21645  }
21646 
21647  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21648 
21649  // If we have a comma after the closing bracket then this is a multiple
21650  // assignment and we should parse the targets.
21651  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21652  pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21653  return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21654  }
21655 
21656  // If we're at the end of the arguments, we can now check if there is a
21657  // block node that starts with a {. If there is, then we can parse it and
21658  // add it to the arguments.
21659  pm_block_node_t *block = NULL;
21660  if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21661  block = parse_block(parser, (uint16_t) (depth + 1));
21662  pm_arguments_validate_block(parser, &arguments, block);
21663  } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21664  block = parse_block(parser, (uint16_t) (depth + 1));
21665  }
21666 
21667  if (block != NULL) {
21668  if (arguments.block != NULL) {
21669  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
21670  if (arguments.arguments == NULL) {
21671  arguments.arguments = pm_arguments_node_create(parser);
21672  }
21673  pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21674  }
21675 
21676  arguments.block = (pm_node_t *) block;
21677  }
21678 
21679  return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
21680  }
21681  case PM_TOKEN_KEYWORD_IN: {
21682  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21683  parser->pattern_matching_newlines = true;
21684 
21685  pm_token_t operator = parser->current;
21686  parser->command_start = false;
21687  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21688  parser_lex(parser);
21689 
21690  pm_constant_id_list_t captures = { 0 };
21691  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21692 
21693  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21694  pm_constant_id_list_free(&captures);
21695 
21696  return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
21697  }
21698  case PM_TOKEN_EQUAL_GREATER: {
21699  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21700  parser->pattern_matching_newlines = true;
21701 
21702  pm_token_t operator = parser->current;
21703  parser->command_start = false;
21704  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21705  parser_lex(parser);
21706 
21707  pm_constant_id_list_t captures = { 0 };
21708  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21709 
21710  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21711  pm_constant_id_list_free(&captures);
21712 
21713  return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
21714  }
21715  default:
21716  assert(false && "unreachable");
21717  return NULL;
21718  }
21719 }
21720 
21721 #undef PM_PARSE_PATTERN_SINGLE
21722 #undef PM_PARSE_PATTERN_TOP
21723 #undef PM_PARSE_PATTERN_MULTI
21724 
21729 static inline bool
21730 pm_call_node_command_p(const pm_call_node_t *node) {
21731  return (
21732  (node->opening_loc.start == NULL) &&
21733  (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21734  (node->arguments != NULL || node->block != NULL)
21735  );
21736 }
21737 
21746 static pm_node_t *
21747 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21748  if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21749  pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21750  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
21751  }
21752 
21753  pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21754 
21755  switch (PM_NODE_TYPE(node)) {
21756  case PM_MISSING_NODE:
21757  // If we found a syntax error, then the type of node returned by
21758  // parse_expression_prefix is going to be a missing node.
21759  return node;
21760  case PM_PRE_EXECUTION_NODE:
21763  case PM_ALIAS_METHOD_NODE:
21764  case PM_MULTI_WRITE_NODE:
21765  case PM_UNDEF_NODE:
21766  // These expressions are statements, and cannot be followed by
21767  // operators (except modifiers).
21768  if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21769  return node;
21770  }
21771  break;
21772  case PM_CALL_NODE:
21773  // If we have a call node, then we need to check if it looks like a
21774  // method call without parentheses that contains arguments. If it
21775  // does, then it has different rules for parsing infix operators,
21776  // namely that it only accepts composition (and/or) and modifiers
21777  // (if/unless/etc.).
21778  if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21779  return node;
21780  }
21781  break;
21782  case PM_SYMBOL_NODE:
21783  // If we have a symbol node that is being parsed as a label, then we
21784  // need to immediately return, because there should never be an
21785  // infix operator following this node.
21786  if (pm_symbol_node_label_p(node)) {
21787  return node;
21788  }
21789  default:
21790  break;
21791  }
21792 
21793  // Otherwise we'll look and see if the next token can be parsed as an infix
21794  // operator. If it can, then we'll parse it using parse_expression_infix.
21795  pm_binding_powers_t current_binding_powers;
21796  pm_token_type_t current_token_type;
21797 
21798  while (
21799  current_token_type = parser->current.type,
21800  current_binding_powers = pm_binding_powers[current_token_type],
21801  binding_power <= current_binding_powers.left &&
21802  current_binding_powers.binary
21803  ) {
21804  node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21805 
21806  switch (PM_NODE_TYPE(node)) {
21807  case PM_MULTI_WRITE_NODE:
21808  // Multi-write nodes are statements, and cannot be followed by
21809  // operators except modifiers.
21810  if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21811  return node;
21812  }
21813  break;
21820  // These expressions are statements, by virtue of the right-hand
21821  // side of their write being an implicit array.
21822  if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21823  return node;
21824  }
21825  break;
21826  case PM_CALL_NODE:
21827  // These expressions are also statements, by virtue of the
21828  // right-hand side of the expression (i.e., the last argument to
21829  // the call node) being an implicit array.
21830  if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21831  return node;
21832  }
21833  break;
21834  default:
21835  break;
21836  }
21837 
21838  // If the operator is nonassoc and we should not be able to parse the
21839  // upcoming infix operator, break.
21840  if (current_binding_powers.nonassoc) {
21841  // If this is a non-assoc operator and we are about to parse the
21842  // exact same operator, then we need to add an error.
21843  if (match1(parser, current_token_type)) {
21844  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21845  break;
21846  }
21847 
21848  // If this is an endless range, then we need to reject a couple of
21849  // additional operators because it violates the normal operator
21850  // precedence rules. Those patterns are:
21851  //
21852  // 1.. & 2
21853  // 1.. * 2
21854  //
21855  if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21857  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21858  break;
21859  }
21860 
21861  if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21862  break;
21863  }
21864  } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21865  break;
21866  }
21867  }
21868 
21869  if (accepts_command_call) {
21870  // A command-style method call is only accepted on method chains.
21871  // Thus, we check whether the parsed node can continue method chains.
21872  // The method chain can continue if the parsed node is one of the following five kinds:
21873  // (1) index access: foo[1]
21874  // (2) attribute access: foo.bar
21875  // (3) method call with parenthesis: foo.bar(1)
21876  // (4) method call with a block: foo.bar do end
21877  // (5) constant path: foo::Bar
21878  switch (node->type) {
21879  case PM_CALL_NODE: {
21880  pm_call_node_t *cast = (pm_call_node_t *)node;
21881  if (
21882  // (1) foo[1]
21883  !(
21884  cast->call_operator_loc.start == NULL &&
21885  cast->message_loc.start != NULL &&
21886  cast->message_loc.start[0] == '[' &&
21887  cast->message_loc.end[-1] == ']'
21888  ) &&
21889  // (2) foo.bar
21890  !(
21891  cast->call_operator_loc.start != NULL &&
21892  cast->arguments == NULL &&
21893  cast->block == NULL &&
21894  cast->opening_loc.start == NULL
21895  ) &&
21896  // (3) foo.bar(1)
21897  !(
21898  cast->call_operator_loc.start != NULL &&
21899  cast->opening_loc.start != NULL
21900  ) &&
21901  // (4) foo.bar do end
21902  !(
21903  cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21904  )
21905  ) {
21906  accepts_command_call = false;
21907  }
21908  break;
21909  }
21910  // (5) foo::Bar
21911  case PM_CONSTANT_PATH_NODE:
21912  break;
21913  default:
21914  accepts_command_call = false;
21915  break;
21916  }
21917  }
21918  }
21919 
21920  return node;
21921 }
21922 
21927 static pm_statements_node_t *
21928 wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21929  if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21930  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21931  pm_arguments_node_arguments_append(
21932  arguments,
21933  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
21934  );
21935 
21936  pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
21937  parser,
21938  arguments,
21939  pm_parser_constant_id_constant(parser, "print", 5)
21940  ), true);
21941  }
21942 
21943  if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21944  if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21945  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21946  pm_arguments_node_arguments_append(
21947  arguments,
21948  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
21949  );
21950 
21951  pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21952  pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
21953 
21954  pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21955  parser,
21956  pm_parser_constant_id_constant(parser, "$F", 2),
21957  (pm_node_t *) call
21958  );
21959 
21960  pm_statements_node_body_prepend(statements, (pm_node_t *) write);
21961  }
21962 
21963  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21964  pm_arguments_node_arguments_append(
21965  arguments,
21966  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
21967  );
21968 
21969  if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21970  pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21971  pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
21972  parser,
21973  (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
21974  &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
21975  (pm_node_t *) pm_true_node_synthesized_create(parser)
21976  ));
21977 
21978  pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
21979  pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21980  }
21981 
21982  pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21983  pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
21984  parser,
21985  (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
21986  statements
21987  ), true);
21988 
21989  statements = wrapped_statements;
21990  }
21991 
21992  return statements;
21993 }
21994 
21998 static pm_node_t *
21999 parse_program(pm_parser_t *parser) {
22000  // If the current scope is NULL, then we want to push a new top level scope.
22001  // The current scope could exist in the event that we are parsing an eval
22002  // and the user has passed into scopes that already exist.
22003  if (parser->current_scope == NULL) {
22004  pm_parser_scope_push(parser, true);
22005  }
22006 
22007  pm_node_list_t current_block_exits = { 0 };
22008  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22009 
22010  parser_lex(parser);
22011  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22012 
22013  if (statements == NULL) {
22014  statements = pm_statements_node_create(parser);
22015  } else if (!parser->parsing_eval) {
22016  // If we have statements, then the top-level statement should be
22017  // explicitly checked as well. We have to do this here because
22018  // everywhere else we check all but the last statement.
22019  assert(statements->body.size > 0);
22020  pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22021  }
22022 
22023  pm_constant_id_list_t locals;
22024  pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22025  pm_parser_scope_pop(parser);
22026 
22027  // If this is an empty file, then we're still going to parse all of the
22028  // statements in order to gather up all of the comments and such. Here we'll
22029  // correct the location information.
22030  if (pm_statements_node_body_length(statements) == 0) {
22031  pm_statements_node_location_set(statements, parser->start, parser->start);
22032  }
22033 
22034  // At the top level, see if we need to wrap the statements in a program
22035  // node with a while loop based on the options.
22037  statements = wrap_statements(parser, statements);
22038  } else {
22039  flush_block_exits(parser, previous_block_exits);
22040  pm_node_list_free(&current_block_exits);
22041  }
22042 
22043  return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22044 }
22045 
22046 /******************************************************************************/
22047 /* External functions */
22048 /******************************************************************************/
22049 
22059 static const char *
22060 pm_strnstr(const char *big, const char *little, size_t big_length) {
22061  size_t little_length = strlen(little);
22062 
22063  for (const char *big_end = big + big_length; big < big_end; big++) {
22064  if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22065  }
22066 
22067  return NULL;
22068 }
22069 
22070 #ifdef _WIN32
22071 #define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22072 #else
22078 static void
22079 pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22080  if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22081  pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22082  }
22083 }
22084 #endif
22085 
22090 static void
22091 pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22092  const char *switches = pm_strnstr(engine, " -", length);
22093  if (switches == NULL) return;
22094 
22095  pm_options_t next_options = *options;
22096  options->shebang_callback(
22097  &next_options,
22098  (const uint8_t *) (switches + 1),
22099  length - ((size_t) (switches - engine)) - 1,
22100  options->shebang_callback_data
22101  );
22102 
22103  size_t encoding_length;
22104  if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22105  const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22106  parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22107  }
22108 
22109  parser->command_line = next_options.command_line;
22110  parser->frozen_string_literal = next_options.frozen_string_literal;
22111 }
22112 
22117 pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22118  assert(source != NULL);
22119 
22120  *parser = (pm_parser_t) {
22121  .node_id = 0,
22122  .lex_state = PM_LEX_STATE_BEG,
22123  .enclosure_nesting = 0,
22124  .lambda_enclosure_nesting = -1,
22125  .brace_nesting = 0,
22126  .do_loop_stack = 0,
22127  .accepts_block_stack = 0,
22128  .lex_modes = {
22129  .index = 0,
22130  .stack = {{ .mode = PM_LEX_DEFAULT }},
22131  .current = &parser->lex_modes.stack[0],
22132  },
22133  .start = source,
22134  .end = source + size,
22135  .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22136  .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22137  .next_start = NULL,
22138  .heredoc_end = NULL,
22139  .data_loc = { .start = NULL, .end = NULL },
22140  .comment_list = { 0 },
22141  .magic_comment_list = { 0 },
22142  .warning_list = { 0 },
22143  .error_list = { 0 },
22144  .current_scope = NULL,
22145  .current_context = NULL,
22146  .encoding = PM_ENCODING_UTF_8_ENTRY,
22147  .encoding_changed_callback = NULL,
22148  .encoding_comment_start = source,
22149  .lex_callback = NULL,
22150  .filepath = { 0 },
22151  .constant_pool = { 0 },
22152  .newline_list = { 0 },
22153  .integer_base = 0,
22154  .current_string = PM_STRING_EMPTY,
22155  .start_line = 1,
22156  .explicit_encoding = NULL,
22157  .command_line = 0,
22158  .parsing_eval = false,
22159  .partial_script = false,
22160  .command_start = true,
22161  .recovering = false,
22162  .encoding_locked = false,
22163  .encoding_changed = false,
22164  .pattern_matching_newlines = false,
22165  .in_keyword_arg = false,
22166  .current_block_exits = NULL,
22167  .semantic_token_seen = false,
22168  .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22169  .current_regular_expression_ascii_only = false,
22170  .warn_mismatched_indentation = true
22171  };
22172 
22173  // Initialize the constant pool. We're going to completely guess as to the
22174  // number of constants that we'll need based on the size of the input. The
22175  // ratio we chose here is actually less arbitrary than you might think.
22176  //
22177  // We took ~50K Ruby files and measured the size of the file versus the
22178  // number of constants that were found in those files. Then we found the
22179  // average and standard deviation of the ratios of constants/bytesize. Then
22180  // we added 1.34 standard deviations to the average to get a ratio that
22181  // would fit 75% of the files (for a two-tailed distribution). This works
22182  // because there was about a 0.77 correlation and the distribution was
22183  // roughly normal.
22184  //
22185  // This ratio will need to change if we add more constants to the constant
22186  // pool for another node type.
22187  uint32_t constant_size = ((uint32_t) size) / 95;
22188  pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22189 
22190  // Initialize the newline list. Similar to the constant pool, we're going to
22191  // guess at the number of newlines that we'll need based on the size of the
22192  // input.
22193  size_t newline_size = size / 22;
22194  pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22195 
22196  // If options were provided to this parse, establish them here.
22197  if (options != NULL) {
22198  // filepath option
22199  parser->filepath = options->filepath;
22200 
22201  // line option
22202  parser->start_line = options->line;
22203 
22204  // encoding option
22205  size_t encoding_length = pm_string_length(&options->encoding);
22206  if (encoding_length > 0) {
22207  const uint8_t *encoding_source = pm_string_source(&options->encoding);
22208  parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22209  }
22210 
22211  // encoding_locked option
22212  parser->encoding_locked = options->encoding_locked;
22213 
22214  // frozen_string_literal option
22215  parser->frozen_string_literal = options->frozen_string_literal;
22216 
22217  // command_line option
22218  parser->command_line = options->command_line;
22219 
22220  // version option
22221  parser->version = options->version;
22222 
22223  // partial_script
22224  parser->partial_script = options->partial_script;
22225 
22226  // scopes option
22227  parser->parsing_eval = options->scopes_count > 0;
22228  if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22229 
22230  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22231  const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22232  pm_parser_scope_push(parser, scope_index == 0);
22233 
22234  // Scopes given from the outside are not allowed to have numbered
22235  // parameters.
22236  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22237 
22238  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22239  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22240 
22241  const uint8_t *source = pm_string_source(local);
22242  size_t length = pm_string_length(local);
22243 
22244  void *allocated = xmalloc(length);
22245  if (allocated == NULL) continue;
22246 
22247  memcpy(allocated, source, length);
22248  pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22249  }
22250  }
22251  }
22252 
22253  pm_accepts_block_stack_push(parser, true);
22254 
22255  // Skip past the UTF-8 BOM if it exists.
22256  if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22257  parser->current.end += 3;
22258  parser->encoding_comment_start += 3;
22259 
22260  if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22262  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22263  }
22264  }
22265 
22266  // If the -x command line flag is set, or the first shebang of the file does
22267  // not include "ruby", then we'll search for a shebang that does include
22268  // "ruby" and start parsing from there.
22269  bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22270 
22271  // If the first two bytes of the source are a shebang, then we will do a bit
22272  // of extra processing.
22273  //
22274  // First, we'll indicate that the encoding comment is at the end of the
22275  // shebang. This means that when a shebang is present the encoding comment
22276  // can begin on the second line.
22277  //
22278  // Second, we will check if the shebang includes "ruby". If it does, then we
22279  // we will start parsing from there. We will also potentially warning the
22280  // user if there is a carriage return at the end of the shebang. We will
22281  // also potentially call the shebang callback if this is the main script to
22282  // allow the caller to parse the shebang and find any command-line options.
22283  // If the shebang does not include "ruby" and this is the main script being
22284  // parsed, then we will start searching the file for a shebang that does
22285  // contain "ruby" as if -x were passed on the command line.
22286  const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22287  size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22288 
22289  if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22290  const char *engine;
22291 
22292  if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22293  if (newline != NULL) {
22294  parser->encoding_comment_start = newline + 1;
22295 
22296  if (options == NULL || options->main_script) {
22297  pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22298  }
22299  }
22300 
22301  if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22302  pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22303  }
22304 
22305  search_shebang = false;
22306  } else if (options->main_script && !parser->parsing_eval) {
22307  search_shebang = true;
22308  }
22309  }
22310 
22311  // Here we're going to find the first shebang that includes "ruby" and start
22312  // parsing from there.
22313  if (search_shebang) {
22314  // If a shebang that includes "ruby" is not found, then we're going to a
22315  // a load error to the list of errors on the parser.
22316  bool found_shebang = false;
22317 
22318  // This is going to point to the start of each line as we check it.
22319  // We'll maintain a moving window looking at each line at they come.
22320  const uint8_t *cursor = parser->start;
22321 
22322  // The newline pointer points to the end of the current line that we're
22323  // considering. If it is NULL, then we're at the end of the file.
22324  const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22325 
22326  while (newline != NULL) {
22327  pm_newline_list_append(&parser->newline_list, newline);
22328 
22329  cursor = newline + 1;
22330  newline = next_newline(cursor, parser->end - cursor);
22331 
22332  size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22333  if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22334  const char *engine;
22335  if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22336  found_shebang = true;
22337 
22338  if (newline != NULL) {
22339  pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22340  parser->encoding_comment_start = newline + 1;
22341  }
22342 
22343  if (options != NULL && options->shebang_callback != NULL) {
22344  pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22345  }
22346 
22347  break;
22348  }
22349  }
22350  }
22351 
22352  if (found_shebang) {
22353  parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22354  parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22355  } else {
22356  pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22358  }
22359  }
22360 
22361  // The encoding comment can start after any amount of inline whitespace, so
22362  // here we'll advance it to the first non-inline-whitespace character so
22363  // that it is ready for future comparisons.
22365 }
22366 
22373  parser->encoding_changed_callback = callback;
22374 }
22375 
22379 static inline void
22380 pm_comment_list_free(pm_list_t *list) {
22381  pm_list_node_t *node, *next;
22382 
22383  for (node = list->head; node != NULL; node = next) {
22384  next = node->next;
22385 
22386  pm_comment_t *comment = (pm_comment_t *) node;
22387  xfree(comment);
22388  }
22389 }
22390 
22394 static inline void
22395 pm_magic_comment_list_free(pm_list_t *list) {
22396  pm_list_node_t *node, *next;
22397 
22398  for (node = list->head; node != NULL; node = next) {
22399  next = node->next;
22400 
22403  }
22404 }
22405 
22411  pm_string_free(&parser->filepath);
22414  pm_comment_list_free(&parser->comment_list);
22415  pm_magic_comment_list_free(&parser->magic_comment_list);
22418 
22419  while (parser->current_scope != NULL) {
22420  // Normally, popping the scope doesn't free the locals since it is
22421  // assumed that ownership has transferred to the AST. However if we have
22422  // scopes while we're freeing the parser, it's likely they came from
22423  // eval scopes and we need to free them explicitly here.
22424  pm_parser_scope_pop(parser);
22425  }
22426 
22427  while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22428  lex_mode_pop(parser);
22429  }
22430 }
22431 
22437  return parse_program(parser);
22438 }
22439 
22445 static bool
22446 pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22447 #define LINE_SIZE 4096
22448  char line[LINE_SIZE];
22449 
22450  while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22451  size_t length = LINE_SIZE;
22452  while (length > 0 && line[length - 1] == '\n') length--;
22453 
22454  if (length == LINE_SIZE) {
22455  // If we read a line that is the maximum size and it doesn't end
22456  // with a newline, then we'll just append it to the buffer and
22457  // continue reading.
22458  length--;
22459  pm_buffer_append_string(buffer, line, length);
22460  continue;
22461  }
22462 
22463  // Append the line to the buffer.
22464  length--;
22465  pm_buffer_append_string(buffer, line, length);
22466 
22467  // Check if the line matches the __END__ marker. If it does, then stop
22468  // reading and return false. In most circumstances, this means we should
22469  // stop reading from the stream so that the DATA constant can pick it
22470  // up.
22471  switch (length) {
22472  case 7:
22473  if (strncmp(line, "__END__", 7) == 0) return false;
22474  break;
22475  case 8:
22476  if (strncmp(line, "__END__\n", 8) == 0) return false;
22477  break;
22478  case 9:
22479  if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22480  break;
22481  }
22482  }
22483 
22484  return true;
22485 #undef LINE_SIZE
22486 }
22487 
22497 static bool
22498 pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22499  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22500 
22501  for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22502  if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22503  return true;
22504  }
22505  }
22506 
22507  return false;
22508 }
22509 
22517 pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22518  pm_buffer_init(buffer);
22519 
22520  bool eof = pm_parse_stream_read(buffer, stream, fgets);
22521  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22522  pm_node_t *node = pm_parse(parser);
22523 
22524  while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22525  pm_node_destroy(parser, node);
22526  eof = pm_parse_stream_read(buffer, stream, fgets);
22527 
22528  pm_parser_free(parser);
22529  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22530  node = pm_parse(parser);
22531  }
22532 
22533  return node;
22534 }
22535 
22540 pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22541  pm_options_t options = { 0 };
22542  pm_options_read(&options, data);
22543 
22544  pm_parser_t parser;
22545  pm_parser_init(&parser, source, size, &options);
22546 
22547  pm_node_t *node = pm_parse(&parser);
22548  pm_node_destroy(&parser, node);
22549 
22550  bool result = parser.error_list.size == 0;
22551  pm_parser_free(&parser);
22552  pm_options_free(&options);
22553 
22554  return result;
22555 }
22556 
22557 #undef PM_CASE_KEYWORD
22558 #undef PM_CASE_OPERATOR
22559 #undef PM_CASE_WRITABLE
22560 #undef PM_STRING_EMPTY
22561 #undef PM_LOCATION_NODE_BASE_VALUE
22562 #undef PM_LOCATION_NODE_VALUE
22563 #undef PM_LOCATION_NULL_VALUE
22564 #undef PM_LOCATION_TOKEN_VALUE
22565 
22566 // We optionally support serializing to a binary string. For systems that don't
22567 // want or need this functionality, it can be turned off with the
22568 // PRISM_EXCLUDE_SERIALIZATION define.
22569 #ifndef PRISM_EXCLUDE_SERIALIZATION
22570 
22571 static inline void
22572 pm_serialize_header(pm_buffer_t *buffer) {
22573  pm_buffer_append_string(buffer, "PRISM", 5);
22578 }
22579 
22584 pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22585  pm_serialize_header(buffer);
22586  pm_serialize_content(parser, node, buffer);
22587  pm_buffer_append_byte(buffer, '\0');
22588 }
22589 
22595 pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22596  pm_options_t options = { 0 };
22597  pm_options_read(&options, data);
22598 
22599  pm_parser_t parser;
22600  pm_parser_init(&parser, source, size, &options);
22601 
22602  pm_node_t *node = pm_parse(&parser);
22603 
22604  pm_serialize_header(buffer);
22605  pm_serialize_content(&parser, node, buffer);
22606  pm_buffer_append_byte(buffer, '\0');
22607 
22608  pm_node_destroy(&parser, node);
22609  pm_parser_free(&parser);
22610  pm_options_free(&options);
22611 }
22612 
22618 pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22619  pm_parser_t parser;
22620  pm_options_t options = { 0 };
22621  pm_options_read(&options, data);
22622 
22623  pm_buffer_t parser_buffer;
22624  pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22625  pm_serialize_header(buffer);
22626  pm_serialize_content(&parser, node, buffer);
22627  pm_buffer_append_byte(buffer, '\0');
22628 
22629  pm_node_destroy(&parser, node);
22630  pm_buffer_free(&parser_buffer);
22631  pm_parser_free(&parser);
22632  pm_options_free(&options);
22633 }
22634 
22639 pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22640  pm_options_t options = { 0 };
22641  pm_options_read(&options, data);
22642 
22643  pm_parser_t parser;
22644  pm_parser_init(&parser, source, size, &options);
22645 
22646  pm_node_t *node = pm_parse(&parser);
22647  pm_serialize_header(buffer);
22648  pm_serialize_encoding(parser.encoding, buffer);
22649  pm_buffer_append_varsint(buffer, parser.start_line);
22650  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22651 
22652  pm_node_destroy(&parser, node);
22653  pm_parser_free(&parser);
22654  pm_options_free(&options);
22655 }
22656 
22657 #endif
22658 
22659 /******************************************************************************/
22660 /* Slice queries for the Ruby API */
22661 /******************************************************************************/
22662 
22664 typedef enum {
22666  PM_SLICE_TYPE_ERROR = -1,
22667 
22669  PM_SLICE_TYPE_NONE,
22670 
22672  PM_SLICE_TYPE_LOCAL,
22673 
22675  PM_SLICE_TYPE_CONSTANT,
22676 
22678  PM_SLICE_TYPE_METHOD_NAME
22679 } pm_slice_type_t;
22680 
22684 pm_slice_type_t
22685 pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22686  // first, get the right encoding object
22687  const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22688  if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22689 
22690  // check that there is at least one character
22691  if (length == 0) return PM_SLICE_TYPE_NONE;
22692 
22693  size_t width;
22694  if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22695  // valid because alphabetical
22696  } else if (*source == '_') {
22697  // valid because underscore
22698  width = 1;
22699  } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22700  // valid because multibyte
22701  } else {
22702  // invalid because no match
22703  return PM_SLICE_TYPE_NONE;
22704  }
22705 
22706  // determine the type of the slice based on the first character
22707  const uint8_t *end = source + length;
22708  pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22709 
22710  // next, iterate through all of the bytes of the string to ensure that they
22711  // are all valid identifier characters
22712  source += width;
22713 
22714  while (source < end) {
22715  if ((width = encoding->alnum_char(source, end - source)) != 0) {
22716  // valid because alphanumeric
22717  source += width;
22718  } else if (*source == '_') {
22719  // valid because underscore
22720  source++;
22721  } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22722  // valid because multibyte
22723  source += width;
22724  } else {
22725  // invalid because no match
22726  break;
22727  }
22728  }
22729 
22730  // accept a ! or ? at the end of the slice as a method name
22731  if (*source == '!' || *source == '?' || *source == '=') {
22732  source++;
22733  result = PM_SLICE_TYPE_METHOD_NAME;
22734  }
22735 
22736  // valid if we are at the end of the slice
22737  return source == end ? result : PM_SLICE_TYPE_NONE;
22738 }
22739 
22744 pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22745  switch (pm_slice_type(source, length, encoding_name)) {
22746  case PM_SLICE_TYPE_ERROR:
22747  return PM_STRING_QUERY_ERROR;
22748  case PM_SLICE_TYPE_NONE:
22749  case PM_SLICE_TYPE_CONSTANT:
22750  case PM_SLICE_TYPE_METHOD_NAME:
22751  return PM_STRING_QUERY_FALSE;
22752  case PM_SLICE_TYPE_LOCAL:
22753  return PM_STRING_QUERY_TRUE;
22754  }
22755 
22756  assert(false && "unreachable");
22757  return PM_STRING_QUERY_FALSE;
22758 }
22759 
22764 pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22765  switch (pm_slice_type(source, length, encoding_name)) {
22766  case PM_SLICE_TYPE_ERROR:
22767  return PM_STRING_QUERY_ERROR;
22768  case PM_SLICE_TYPE_NONE:
22769  case PM_SLICE_TYPE_LOCAL:
22770  case PM_SLICE_TYPE_METHOD_NAME:
22771  return PM_STRING_QUERY_FALSE;
22772  case PM_SLICE_TYPE_CONSTANT:
22773  return PM_STRING_QUERY_TRUE;
22774  }
22775 
22776  assert(false && "unreachable");
22777  return PM_STRING_QUERY_FALSE;
22778 }
22779 
22784 pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22785 #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22786 #define C1(c) (*source == c)
22787 #define C2(s) (memcmp(source, s, 2) == 0)
22788 #define C3(s) (memcmp(source, s, 3) == 0)
22789 
22790  switch (pm_slice_type(source, length, encoding_name)) {
22791  case PM_SLICE_TYPE_ERROR:
22792  return PM_STRING_QUERY_ERROR;
22793  case PM_SLICE_TYPE_NONE:
22794  break;
22795  case PM_SLICE_TYPE_LOCAL:
22796  // numbered parameters are not valid method names
22797  return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22798  case PM_SLICE_TYPE_CONSTANT:
22799  // all constants are valid method names
22800  case PM_SLICE_TYPE_METHOD_NAME:
22801  // all method names are valid method names
22802  return PM_STRING_QUERY_TRUE;
22803  }
22804 
22805  switch (length) {
22806  case 1:
22807  return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22808  case 2:
22809  return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22810  case 3:
22811  return B(C3("===") || C3("<=>") || C3("[]="));
22812  default:
22813  return PM_STRING_QUERY_FALSE;
22814  }
22815 
22816 #undef B
22817 #undef C1
22818 #undef C2
22819 #undef C3
22820 }
@ PM_RANGE_FLAGS_EXCLUDE_END
...
Definition: ast.h:7430
@ PM_DEFINED_NODE
DefinedNode.
Definition: ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition: ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition: ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition: ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition: ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition: ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition: ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition: ast.h:628
@ PM_NIL_NODE
NilNode.
Definition: ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition: ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition: ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition: ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition: ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition: ast.h:691
@ PM_OR_NODE
OrNode.
Definition: ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition: ast.h:889
@ PM_IF_NODE
IfNode.
Definition: ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition: ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition: ast.h:724
@ PM_HASH_NODE
HashNode.
Definition: ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition: ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition: ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition: ast.h:760
@ PM_AND_NODE
AndNode.
Definition: ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition: ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition: ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition: ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition: ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition: ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition: ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition: ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition: ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition: ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition: ast.h:1000
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition: ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition: ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition: ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition: ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition: ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition: ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition: ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition: ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition: ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition: ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition: ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition: ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition: ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition: ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition: ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition: ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition: ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition: ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition: ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition: ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition: ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition: ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition: ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition: ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition: ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition: ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition: ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition: ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition: ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition: ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition: ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition: ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition: ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition: ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition: ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition: ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition: ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition: ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition: ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition: ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition: ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition: ast.h:862
@ PM_STRING_NODE
StringNode.
Definition: ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition: ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition: ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition: ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition: ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition: ast.h:823
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition: ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition: ast.h:7513
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition: ast.h:7496
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition: ast.h:7493
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition: ast.h:7490
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition: ast.h:7322
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition: ast.h:7325
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition: ast.h:7328
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition: ast.h:1063
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition: ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition: ast.h:1053
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition: ast.h:7387
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition: ast.h:7384
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition: ast.h:7381
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition: ast.h:7378
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition: ast.h:7522
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition: ast.h:7350
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition: ast.h:7356
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition: ast.h:7353
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition: ast.h:7468
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition: ast.h:7441
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition: ast.h:1040
@ PM_TOKEN_STAR_STAR
**
Definition: ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ...
Definition: ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition: ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition: ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition: ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition: ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition: ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition: ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition: ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition: ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition: ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition: ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition: ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition: ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition: ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition: ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition: ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition: ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition: ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition: ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition: ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition: ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition: ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition: ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition: ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition: ast.h:328
@ PM_TOKEN_COMMA
,
Definition: ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition: ast.h:523
@ PM_TOKEN_GREATER
Definition: ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition: ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition: ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition: ast.h:505
@ PM_TOKEN_EMBVAR
Definition: ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition: ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition: ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition: ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition: ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition: ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition: ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition: ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition: ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition: ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
>>=
Definition: ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition: ast.h:409
@ PM_TOKEN_PERCENT
%
Definition: ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition: ast.h:274
@ PM_TOKEN_BANG
! or !@
Definition: ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition: ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition: ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition: ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition: ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition: ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition: ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition: ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition: ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition: ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition: ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition: ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition: ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition: ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition: ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition: ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition: ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition: ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition: ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition: ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition: ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition: ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition: ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition: ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition: ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition: ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition: ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition: ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition: ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition: ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition: ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition: ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition: ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition: ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition: ast.h:100
@ PM_TOKEN_PIPE
|
Definition: ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition: ast.h:397
@ PM_TOKEN_BANG_TILDE
!~
Definition: ast.h:67
@ PM_TOKEN_DOT
the .
Definition: ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition: ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition: ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition: ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition: ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition: ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition: ast.h:490
@ PM_TOKEN_MINUS
Definition: ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition: ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition: ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition: ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition: ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition: ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition: ast.h:217
@ PM_TOKEN_LESS
<
Definition: ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition: ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition: ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition: ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition: ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition: ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition: ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition: ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition: ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition: ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition: ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition: ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition: ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition: ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition: ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition: ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition: ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition: ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition: ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition: ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition: ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition: ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition: ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition: ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition: ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition: ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition: ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition: ast.h:271
@ PM_TOKEN_SLASH
/
Definition: ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition: ast.h:301
@ PM_TOKEN_COLON
:
Definition: ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition: ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition: ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition: ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition: ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition: ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition: ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition: ast.h:337
@ PM_TOKEN_EQUAL
=
Definition: ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition: ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ...
Definition: ast.h:499
@ PM_TOKEN_STAR
Definition: ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition: ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition: ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition: ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition: ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition: ast.h:448
@ PM_TOKEN_CARET
^
Definition: ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition: ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition: ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition: ast.h:277
@ PM_TOKEN_PLUS
Definition: ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition: ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition: ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition: ast.h:205
@ PM_TOKEN_LABEL
a label
Definition: ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition: ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition: ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition: ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition: ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition: ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition: ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition: ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition: ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition: ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition: ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition: ast.h:367
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition: ast.h:7367
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition: ast.h:7414
void pm_diagnostic_list_free(pm_list_t *list)
Deallocate the internal state of the given diagnostic list.
Definition: diagnostic.c:831
bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id,...)
Append a diagnostic to the given list of diagnostics that is using a format string for its message.
Definition: diagnostic.c:787
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition: diagnostic.h:29
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id)
Append a diagnostic to the given list of diagnostics that is using shared memory for its message.
Definition: diagnostic.c:766
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition: xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition: options.c:181
void pm_options_read(pm_options_t *options, const char *data)
Deserialize an options struct from the given binary string.
Definition: options.c:238
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition: options.h:185
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition: options.h:20
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition: options.c:154
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition: options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition: options.h:26
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition: options.c:173
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition: options.h:191
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition: options.h:71
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition: parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition: parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition: parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition: parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition: parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition: parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition: parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition: parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition: parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition: parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition: parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition: parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition: parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition: parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition: parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition: parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition: parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition: parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition: parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition: parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition: parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition: parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition: parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition: parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition: parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition: parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition: parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition: parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition: parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition: parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition: parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition: parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition: parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition: parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition: parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition: parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition: parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition: parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition: parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition: parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition: parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition: parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition: parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition: parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition: parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition: parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition: parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition: parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition: parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition: parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition: parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition: parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition: parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition: parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition: parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition: parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition: parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition: parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition: parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition: parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition: parser.h:435
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition: parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition: parser.h:448
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity)
Initialize a pm_buffer_t with the given capacity.
Definition: pm_buffer.c:15
void pm_buffer_append_format(pm_buffer_t *buffer, const char *format,...) PRISM_ATTRIBUTE_FORMAT(2
Append a formatted string to the buffer.
void void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length)
Append a string to the buffer.
Definition: pm_buffer.c:119
PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition: pm_buffer.c:43
void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value)
Append a single byte to the buffer.
Definition: pm_buffer.c:135
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition: pm_buffer.c:27
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value)
Append a 32-bit signed integer to the buffer as a variable-length integer.
Definition: pm_buffer.c:161
PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition: pm_buffer.c:35
PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition: pm_buffer.c:315
void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length)
Append a list of bytes to the buffer.
Definition: pm_buffer.c:127
size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are hexadecimal digits.
Definition: pm_char.c:249
bool pm_char_is_decimal_digit(const uint8_t b)
Returns true if the given character is a decimal digit.
Definition: pm_char.c:295
size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are whitespace.
Definition: pm_char.c:76
size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are hexadecimal digits or underscore...
Definition: pm_char.c:263
size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are decimal digits or underscores.
Definition: pm_char.c:239
size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are decimal digits.
Definition: pm_char.c:225
size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are binary digits or underscores.
Definition: pm_char.c:202
size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are octal digits or underscores.
Definition: pm_char.c:216
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list)
Returns the number of characters at the start of the string that are whitespace while also tracking t...
Definition: pm_char.c:86
bool pm_char_is_hexadecimal_digit(const uint8_t b)
Returns true if the given character is a hexadecimal digit.
Definition: pm_char.c:303
bool pm_char_is_octal_digit(const uint8_t b)
Returns true if the given character is an octal digit.
Definition: pm_char.c:287
bool pm_char_is_binary_digit(const uint8_t b)
Returns true if the given character is a binary digit.
Definition: pm_char.c:279
bool pm_char_is_inline_whitespace(const uint8_t b)
Returns true if the given character is an inline whitespace character.
Definition: pm_char.c:141
size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are inline whitespace.
Definition: pm_char.c:108
bool pm_char_is_whitespace(const uint8_t b)
Returns true if the given character is a whitespace character.
Definition: pm_char.c:133
size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are regexp options.
Definition: pm_char.c:117
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity)
Initialize a new constant pool with a given capacity.
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool that is a slice of a source string.
void pm_constant_id_list_free(pm_constant_id_list_t *list)
Free the memory associated with a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is constant.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id)
Insert a constant id into a list of constant ids at the specified index.
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id)
Append a constant id to a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is now owned by the constant pool.
void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity)
Initialize a list of constant ids with a given capacity.
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id)
Return a pointer to the constant indicated by the given constant id.
bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id)
Checks if the current constant id list includes the given constant id.
void pm_constant_pool_free(pm_constant_pool_t *pool)
Free the memory associated with a constant pool.
void pm_list_append(pm_list_t *list, pm_list_node_t *node)
Append a node to the given list.
Definition: pm_list.c:23
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding)
We need to roll our own memchr to handle cases where the encoding changes and we need to search for a...
Definition: pm_memchr.c:11
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line)
Returns the line and column of the given offset.
void pm_newline_list_free(pm_newline_list_t *list)
Free the internal memory allocated for the newline list.
int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line)
Returns the line of the given offset.
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity)
Initialize a new newline list with the given capacity.
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor)
Append a new offset to the newline list.
void pm_newline_list_clear(pm_newline_list_t *list)
Clear out the newlines that have been appended to the list.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition: pm_string.c:352
void pm_string_ensure_owned(pm_string_t *string)
Ensure the string is owned.
Definition: pm_string.c:315
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length)
Initialize an owned string that is responsible for freeing allocated memory.
Definition: pm_string.c:30
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end)
Initialize a shared string that is based on initial input.
Definition: pm_string.c:16
#define PM_STRING_EMPTY
Defines an empty string.
Definition: pm_string.h:70
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition: pm_string.c:368
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition: pm_string.c:360
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length)
Compare two strings, ignoring case, up to the given length.
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate)
Here we have rolled our own version of strpbrk.
Definition: pm_strpbrk.c:194
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition: defines.h:233
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition: defines.h:78
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition: defines.h:34
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition: defines.h:113
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition: defines.h:50
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
Definition: encoding.c:2346
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition: encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition: encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition: encoding.h:68
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
Definition: encoding.c:2164
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
Definition: encoding.c:5026
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
Definition: encoding.c:2287
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition: encoding.h:74
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node)
Deallocate a node and all of its children.
Definition: node.c:114
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition: node.h:17
void pm_node_list_free(pm_node_list_t *list)
Free the internal memory associated with the given node list.
Definition: node.c:88
void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other)
Concatenate the given node list onto the end of the other node list.
Definition: node.c:77
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node)
Append a new node onto the end of the node list.
Definition: node.c:55
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition: version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition: version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition: version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition: version.h:12
The main header file for the prism parser.
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid method name.
Definition: prism.c:22784
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition: prism.c:22372
PRISM_EXPORTED_FUNCTION const char * pm_version(void)
The prism version and the serialization format.
Definition: prism.c:7
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition: prism.c:22410
pm_string_query_t
Represents the results of a slice query.
Definition: prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition: prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition: prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition: prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition: serialize.c:2121
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition: prism.h:88
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the AST represented by the given node to the given buffer.
Definition: prism.c:22584
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data)
Parse and serialize the comments in the given source to the given buffer.
Definition: prism.c:22639
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data)
Parse and serialize the AST represented by the source that is read out of the given stream into to th...
Definition: prism.c:22618
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition: prism.c:22517
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Initiate the parser with the given parser.
Definition: prism.c:22436
PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data)
Parse the given source to the AST and dump the AST to the given buffer.
Definition: prism.c:22595
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition: serialize.c:2098
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition: prism.c:22117
PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data)
Parse the source and return true if it parses without errors or warnings.
Definition: prism.c:22540
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid constant name.
Definition: prism.c:22764
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid local variable name.
Definition: prism.c:22744
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition: serialize.c:2028
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition: token_type.c:362
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data)
Parse a regular expression.
Definition: regexp.c:777
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node)
Create a string-based representation of the given static literal.
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace)
Add a node to the set of static literals.
void pm_static_literals_free(pm_static_literals_t *literals)
Free the internal memory associated with the given static literals set.
This struct is used to pass information between the regular expression parser and the error callback.
Definition: prism.c:17931
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition: prism.c:17933
const uint8_t * start
The start of the regular expression.
Definition: prism.c:17936
bool shared
Whether or not the source of the regular expression is shared.
Definition: prism.c:17947
const uint8_t * end
The end of the regular expression.
Definition: prism.c:17939
This struct is used to pass information between the regular expression parser and the named capture c...
Definition: prism.c:20777
pm_constant_id_list_t names
The list of names that have been parsed.
Definition: prism.c:20788
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition: prism.c:20779
pm_match_write_node_t * match
The match write node that is being created.
Definition: prism.c:20785
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition: prism.c:20782
bool shared
Whether the content of the regular expression is shared.
Definition: prism.c:20795
AndNode.
Definition: ast.h:1258
struct pm_node * left
AndNode::left.
Definition: ast.h:1274
struct pm_node * right
AndNode::right.
Definition: ast.h:1287
ArgumentsNode.
Definition: ast.h:1319
pm_node_t base
The embedded base node.
Definition: ast.h:1321
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition: ast.h:1327
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition: prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition: prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition: prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition: prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition: prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition: prism.c:1583
ArrayNode.
Definition: ast.h:1345
struct pm_node_list elements
ArrayNode::elements.
Definition: ast.h:1355
ArrayPatternNode.
Definition: ast.h:1406
struct pm_node * constant
ArrayPatternNode::constant.
Definition: ast.h:1414
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition: ast.h:1434
pm_node_t base
The embedded base node.
Definition: ast.h:1408
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition: ast.h:1439
AssocNode.
Definition: ast.h:1454
struct pm_node * value
AssocNode::value.
Definition: ast.h:1486
struct pm_node * key
AssocNode::key.
Definition: ast.h:1473
BeginNode.
Definition: ast.h:1580
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition: ast.h:1608
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition: ast.h:1598
struct pm_statements_node * statements
BeginNode::statements.
Definition: ast.h:1593
pm_node_t base
The embedded base node.
Definition: ast.h:1582
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition: ast.h:1603
This struct represents a set of binding powers used for a given token.
Definition: prism.c:12881
bool binary
Whether or not this token can be used as a binary operator.
Definition: prism.c:12889
pm_binding_power_t left
The left binding power.
Definition: prism.c:12883
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition: prism.c:12895
pm_binding_power_t right
The right binding power.
Definition: prism.c:12886
BlockLocalVariableNode.
Definition: ast.h:1659
BlockNode.
Definition: ast.h:1682
BlockParameterNode.
Definition: ast.h:1729
BlockParametersNode.
Definition: ast.h:1766
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition: pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition: pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition: pm_buffer.h:30
CallNode.
Definition: ast.h:1922
pm_location_t opening_loc
CallNode::opening_loc.
Definition: ast.h:1961
pm_location_t closing_loc
CallNode::closing_loc.
Definition: ast.h:1971
struct pm_node * receiver
CallNode::receiver.
Definition: ast.h:1941
pm_constant_id_t name
CallNode::name.
Definition: ast.h:1951
pm_node_t base
The embedded base node.
Definition: ast.h:1924
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition: ast.h:1946
pm_location_t message_loc
CallNode::message_loc.
Definition: ast.h:1956
struct pm_arguments_node * arguments
CallNode::arguments.
Definition: ast.h:1966
struct pm_node * block
CallNode::block.
Definition: ast.h:1976
CaseMatchNode.
Definition: ast.h:2201
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition: ast.h:2214
CaseNode.
Definition: ast.h:2246
struct pm_node_list conditions
CaseNode::conditions.
Definition: ast.h:2259
ClassVariableReadNode.
Definition: ast.h:2466
ClassVariableTargetNode.
Definition: ast.h:2495
ClassVariableWriteNode.
Definition: ast.h:2518
This is a node in the linked list of comments that we've found while parsing.
Definition: parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition: parser.h:466
pm_location_t location
The location of the comment in the source.
Definition: parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition: ast.h:2732
ConstantPathTargetNode.
Definition: ast.h:2870
ConstantReadNode.
Definition: ast.h:2965
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition: ast.h:2994
ConstantWriteNode.
Definition: ast.h:3017
This is a node in a linked list of contexts.
Definition: parser.h:439
pm_context_t context
The context that this node represents.
Definition: parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition: parser.h:444
This struct represents a diagnostic generated during parsing.
Definition: diagnostic.h:359
pm_list_node_t node
The embedded base node.
Definition: diagnostic.h:361
pm_diagnostic_id_t diag_id
The ID of the diagnostic.
Definition: diagnostic.h:367
ElseNode.
Definition: ast.h:3196
struct pm_statements_node * statements
ElseNode::statements.
Definition: ast.h:3209
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition: encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition: encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition: encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition: encoding.h:50
const char * name
The name of the encoding.
Definition: encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition: encoding.h:43
EnsureNode.
Definition: ast.h:3294
struct pm_statements_node * statements
EnsureNode::statements.
Definition: ast.h:3307
FindPatternNode.
Definition: ast.h:3351
struct pm_node * constant
FindPatternNode::constant.
Definition: ast.h:3359
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition: ast.h:3379
pm_node_t base
The embedded base node.
Definition: ast.h:3353
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition: ast.h:3384
FlipFlopNode.
Definition: ast.h:3402
FloatNode.
Definition: ast.h:3435
double value
FloatNode::value.
Definition: ast.h:3445
pm_node_t base
The embedded base node.
Definition: ast.h:3437
ForwardingParameterNode.
Definition: ast.h:3571
GlobalVariableReadNode.
Definition: ast.h:3731
GlobalVariableTargetNode.
Definition: ast.h:3760
GlobalVariableWriteNode.
Definition: ast.h:3783
HashNode.
Definition: ast.h:3845
struct pm_node_list elements
HashNode::elements.
Definition: ast.h:3871
HashPatternNode.
Definition: ast.h:3899
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition: ast.h:3922
pm_node_t base
The embedded base node.
Definition: ast.h:3901
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition: ast.h:3927
struct pm_node * constant
HashPatternNode::constant.
Definition: ast.h:3907
All of the information necessary to store to lexing a heredoc.
Definition: parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition: parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition: parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition: parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition: parser.h:90
IfNode.
Definition: ast.h:3948
struct pm_statements_node * statements
IfNode::statements.
Definition: ast.h:4008
struct pm_node * subsequent
IfNode::subsequent.
Definition: ast.h:4027
ImaginaryNode.
Definition: ast.h:4054
InstanceVariableReadNode.
Definition: ast.h:4544
InstanceVariableTargetNode.
Definition: ast.h:4573
InstanceVariableWriteNode.
Definition: ast.h:4596
IntegerNode.
Definition: ast.h:4664
pm_integer_t value
IntegerNode::value.
Definition: ast.h:4674
pm_node_t base
The embedded base node.
Definition: ast.h:4666
bool negative
Whether or not the integer is negative.
Definition: pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition: ast.h:4702
InterpolatedRegularExpressionNode.
Definition: ast.h:4748
InterpolatedStringNode.
Definition: ast.h:4785
pm_node_t base
The embedded base node.
Definition: ast.h:4787
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition: ast.h:4793
InterpolatedSymbolNode.
Definition: ast.h:4818
pm_node_t base
The embedded base node.
Definition: ast.h:4820
InterpolatedXStringNode.
Definition: ast.h:4851
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition: ast.h:4859
pm_node_t base
The embedded base node.
Definition: ast.h:4853
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition: ast.h:4864
KeywordHashNode.
Definition: ast.h:4923
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition: parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition: parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition: parser.h:109
enum pm_lex_mode::@91 mode
The type of this lex mode.
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition: parser.h:254
union pm_lex_mode::@92 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition: parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition: pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition: pm_list.h:48
This represents the overall linked list.
Definition: pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition: pm_list.h:60
size_t size
The size of the list.
Definition: pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition: parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition: parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition: parser.h:537
uint32_t hash
The hash of the local variable.
Definition: parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition: parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition: parser.h:543
LocalVariableReadNode.
Definition: ast.h:5165
uint32_t depth
LocalVariableReadNode::depth.
Definition: ast.h:5196
pm_constant_id_t name
LocalVariableReadNode::name.
Definition: ast.h:5183
LocalVariableTargetNode.
Definition: ast.h:5211
LocalVariableWriteNode.
Definition: ast.h:5239
uint32_t depth
LocalVariableWriteNode::depth.
Definition: ast.h:5266
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition: ast.h:5253
This is a set of local variables in a certain lexical context (method, class, module,...
Definition: parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition: parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition: parser.h:559
uint32_t size
The number of local variables in the set.
Definition: parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition: ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition: ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition: ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition: parser.h:475
MatchLastLineNode.
Definition: ast.h:5331
MatchWriteNode.
Definition: ast.h:5435
struct pm_node_list targets
MatchWriteNode::targets.
Definition: ast.h:5448
MultiTargetNode.
Definition: ast.h:5531
pm_node_t base
The embedded base node.
Definition: ast.h:5533
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition: ast.h:5589
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition: ast.h:5549
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition: ast.h:5599
MultiWriteNode.
Definition: ast.h:5614
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition: ast.h:558
size_t size
The number of nodes in the list.
Definition: ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition: ast.h:566
This is the base structure that represents a node in the syntax tree.
Definition: ast.h:1069
pm_node_type_t type
This represents the type of the node.
Definition: ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition: ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition: ast.h:1092
OptionalParameterNode.
Definition: ast.h:5887
A scope of locals surrounding the code that is being parsed.
Definition: options.h:36
size_t locals_count
The number of locals in the scope.
Definition: options.h:38
The options that can be passed to the parser.
Definition: options.h:77
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition: options.h:126
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition: options.h:88
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition: options.h:142
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition: options.h:149
pm_string_t encoding
The name of the encoding that the source file is in.
Definition: options.h:103
int32_t line
The line within the file that the parse starts on.
Definition: options.h:97
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition: options.h:82
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition: options.h:135
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition: options.h:159
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition: options.h:108
pm_string_t filepath
The name of the file that is currently being parsed.
Definition: options.h:91
pm_options_version_t version
The version of prism that we should be parsing with.
Definition: options.h:123
OrNode.
Definition: ast.h:5925
struct pm_node * left
OrNode::left.
Definition: ast.h:5941
struct pm_node * right
OrNode::right.
Definition: ast.h:5954
ParametersNode.
Definition: ast.h:5980
struct pm_node * rest
ParametersNode::rest.
Definition: ast.h:5998
struct pm_block_parameter_node * block
ParametersNode::block.
Definition: ast.h:6018
pm_node_t base
The embedded base node.
Definition: ast.h:5982
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition: ast.h:6013
ParenthesesNode.
Definition: ast.h:6033
struct pm_node * body
ParenthesesNode::body.
Definition: ast.h:6041
This struct represents the overall parser.
Definition: parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition: parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition: parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition: parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition: parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition: parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition: parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition: parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition: parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition: parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition: parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition: parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition: parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition: parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition: parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition: parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition: parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition: parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition: parser.h:856
pm_token_t previous
The previous token we were considering.
Definition: parser.h:697
struct pm_parser::@97 lex_modes
A stack of lex modes.
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition: parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition: parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition: parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition: parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition: parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition: parser.h:740
const uint8_t * start
The pointer to the start of the source.
Definition: parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition: parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition: parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition: parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition: parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition: parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition: parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition: parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition: parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition: parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition: parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition: parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition: parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition: parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition: parser.h:718
size_t index
The current index into the lexer mode stack.
Definition: parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition: parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition: parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition: parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition: parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition: parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition: parser.h:646
RangeNode.
Definition: ast.h:6239
struct pm_node * right
RangeNode::right.
Definition: ast.h:6269
struct pm_node * left
RangeNode::left.
Definition: ast.h:6255
RationalNode.
Definition: ast.h:6297
pm_node_t base
The embedded base node.
Definition: ast.h:6299
pm_integer_t numerator
RationalNode::numerator.
Definition: ast.h:6309
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition: prism.c:10364
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition: prism.c:10369
pm_token_buffer_t base
The embedded base buffer.
Definition: prism.c:10366
RegularExpressionNode.
Definition: ast.h:6364
pm_node_t base
The embedded base node.
Definition: ast.h:6366
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition: ast.h:6387
RequiredParameterNode.
Definition: ast.h:6438
RescueNode.
Definition: ast.h:6499
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition: ast.h:6532
pm_node_t base
The embedded base node.
Definition: ast.h:6501
This struct represents a node in a linked list of scopes.
Definition: parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition: parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition: parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition: parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition: parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition: parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition: parser.h:626
SplatNode.
Definition: ast.h:6794
struct pm_node * expression
SplatNode::expression.
Definition: ast.h:6807
StatementsNode.
Definition: ast.h:6822
struct pm_node_list body
StatementsNode::body.
Definition: ast.h:6830
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition: ast.h:6857
pm_node_t base
The embedded base node.
Definition: ast.h:6859
pm_string_t unescaped
StringNode::unescaped.
Definition: ast.h:6880
pm_location_t closing_loc
StringNode::closing_loc.
Definition: ast.h:6875
pm_location_t opening_loc
StringNode::opening_loc.
Definition: ast.h:6865
A generic string type that can have various ownership semantics.
Definition: pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition: pm_string.h:35
size_t length
The length of the string in bytes of memory.
Definition: pm_string.h:38
enum pm_string_t::@98 type
The type of the string.
SymbolNode.
Definition: ast.h:6949
pm_location_t value_loc
SymbolNode::value_loc.
Definition: ast.h:6962
pm_string_t unescaped
SymbolNode::unescaped.
Definition: ast.h:6972
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition: prism.c:10338
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition: prism.c:10343
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition: prism.c:10349
This struct represents a token in the Ruby source.
Definition: ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition: ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition: ast.h:535
pm_token_type_t type
The type of the token.
Definition: ast.h:532
UndefNode.
Definition: ast.h:7005
UnlessNode.
Definition: ast.h:7036
struct pm_statements_node * statements
UnlessNode::statements.
Definition: ast.h:7086
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition: ast.h:7096
WhenNode.
Definition: ast.h:7167
XStringNode.
Definition: ast.h:7253