Ruby  3.4.0dev (2024-11-05 revision e440268d51fe02b303e3817a7a733a0dac1c5091)
prism.c
1 #include "prism.h"
2 
6 const char *
7 pm_version(void) {
8  return PRISM_VERSION;
9 }
10 
15 #define PM_TAB_WHITESPACE_SIZE 8
16 
17 // Macros for min/max.
18 #define MIN(a,b) (((a)<(b))?(a):(b))
19 #define MAX(a,b) (((a)>(b))?(a):(b))
20 
21 /******************************************************************************/
22 /* Lex mode manipulations */
23 /******************************************************************************/
24 
29 static inline uint8_t
30 lex_mode_incrementor(const uint8_t start) {
31  switch (start) {
32  case '(':
33  case '[':
34  case '{':
35  case '<':
36  return start;
37  default:
38  return '\0';
39  }
40 }
41 
46 static inline uint8_t
47 lex_mode_terminator(const uint8_t start) {
48  switch (start) {
49  case '(':
50  return ')';
51  case '[':
52  return ']';
53  case '{':
54  return '}';
55  case '<':
56  return '>';
57  default:
58  return start;
59  }
60 }
61 
67 static bool
68 lex_mode_push(pm_parser_t *parser, pm_lex_mode_t lex_mode) {
69  lex_mode.prev = parser->lex_modes.current;
70  parser->lex_modes.index++;
71 
72  if (parser->lex_modes.index > PM_LEX_STACK_SIZE - 1) {
73  parser->lex_modes.current = (pm_lex_mode_t *) xmalloc(sizeof(pm_lex_mode_t));
74  if (parser->lex_modes.current == NULL) return false;
75 
76  *parser->lex_modes.current = lex_mode;
77  } else {
78  parser->lex_modes.stack[parser->lex_modes.index] = lex_mode;
79  parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
80  }
81 
82  return true;
83 }
84 
88 static inline bool
89 lex_mode_push_list(pm_parser_t *parser, bool interpolation, uint8_t delimiter) {
90  uint8_t incrementor = lex_mode_incrementor(delimiter);
91  uint8_t terminator = lex_mode_terminator(delimiter);
92 
93  pm_lex_mode_t lex_mode = {
94  .mode = PM_LEX_LIST,
95  .as.list = {
96  .nesting = 0,
97  .interpolation = interpolation,
98  .incrementor = incrementor,
99  .terminator = terminator
100  }
101  };
102 
103  // These are the places where we need to split up the content of the list.
104  // We'll use strpbrk to find the first of these characters.
105  uint8_t *breakpoints = lex_mode.as.list.breakpoints;
106  memcpy(breakpoints, "\\ \t\f\r\v\n\0\0\0", sizeof(lex_mode.as.list.breakpoints));
107  size_t index = 7;
108 
109  // Now we'll add the terminator to the list of breakpoints. If the
110  // terminator is not already a NULL byte, add it to the list.
111  if (terminator != '\0') {
112  breakpoints[index++] = terminator;
113  }
114 
115  // If interpolation is allowed, then we're going to check for the #
116  // character. Otherwise we'll only look for escapes and the terminator.
117  if (interpolation) {
118  breakpoints[index++] = '#';
119  }
120 
121  // If there is an incrementor, then we'll check for that as well.
122  if (incrementor != '\0') {
123  breakpoints[index++] = incrementor;
124  }
125 
126  parser->explicit_encoding = NULL;
127  return lex_mode_push(parser, lex_mode);
128 }
129 
135 static inline bool
136 lex_mode_push_list_eof(pm_parser_t *parser) {
137  return lex_mode_push_list(parser, false, '\0');
138 }
139 
143 static inline bool
144 lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
145  pm_lex_mode_t lex_mode = {
146  .mode = PM_LEX_REGEXP,
147  .as.regexp = {
148  .nesting = 0,
149  .incrementor = incrementor,
150  .terminator = terminator
151  }
152  };
153 
154  // These are the places where we need to split up the content of the
155  // regular expression. We'll use strpbrk to find the first of these
156  // characters.
157  uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
158  memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
159  size_t index = 4;
160 
161  // First we'll add the terminator.
162  if (terminator != '\0') {
163  breakpoints[index++] = terminator;
164  }
165 
166  // Next, if there is an incrementor, then we'll check for that as well.
167  if (incrementor != '\0') {
168  breakpoints[index++] = incrementor;
169  }
170 
171  parser->explicit_encoding = NULL;
172  return lex_mode_push(parser, lex_mode);
173 }
174 
178 static inline bool
179 lex_mode_push_string(pm_parser_t *parser, bool interpolation, bool label_allowed, uint8_t incrementor, uint8_t terminator) {
180  pm_lex_mode_t lex_mode = {
181  .mode = PM_LEX_STRING,
182  .as.string = {
183  .nesting = 0,
184  .interpolation = interpolation,
185  .label_allowed = label_allowed,
186  .incrementor = incrementor,
187  .terminator = terminator
188  }
189  };
190 
191  // These are the places where we need to split up the content of the
192  // string. We'll use strpbrk to find the first of these characters.
193  uint8_t *breakpoints = lex_mode.as.string.breakpoints;
194  memcpy(breakpoints, "\r\n\\\0\0\0", sizeof(lex_mode.as.string.breakpoints));
195  size_t index = 3;
196 
197  // Now add in the terminator. If the terminator is not already a NULL byte,
198  // then we'll add it.
199  if (terminator != '\0') {
200  breakpoints[index++] = terminator;
201  }
202 
203  // If interpolation is allowed, then we're going to check for the #
204  // character. Otherwise we'll only look for escapes and the terminator.
205  if (interpolation) {
206  breakpoints[index++] = '#';
207  }
208 
209  // If we have an incrementor, then we'll add that in as a breakpoint as
210  // well.
211  if (incrementor != '\0') {
212  breakpoints[index++] = incrementor;
213  }
214 
215  parser->explicit_encoding = NULL;
216  return lex_mode_push(parser, lex_mode);
217 }
218 
224 static inline bool
225 lex_mode_push_string_eof(pm_parser_t *parser) {
226  return lex_mode_push_string(parser, false, false, '\0', '\0');
227 }
228 
234 static void
235 lex_mode_pop(pm_parser_t *parser) {
236  if (parser->lex_modes.index == 0) {
237  parser->lex_modes.current->mode = PM_LEX_DEFAULT;
238  } else if (parser->lex_modes.index < PM_LEX_STACK_SIZE) {
239  parser->lex_modes.index--;
240  parser->lex_modes.current = &parser->lex_modes.stack[parser->lex_modes.index];
241  } else {
242  parser->lex_modes.index--;
243  pm_lex_mode_t *prev = parser->lex_modes.current->prev;
244  xfree(parser->lex_modes.current);
245  parser->lex_modes.current = prev;
246  }
247 }
248 
252 static inline bool
253 lex_state_p(const pm_parser_t *parser, pm_lex_state_t state) {
254  return parser->lex_state & state;
255 }
256 
257 typedef enum {
258  PM_IGNORED_NEWLINE_NONE = 0,
259  PM_IGNORED_NEWLINE_ALL,
260  PM_IGNORED_NEWLINE_PATTERN
261 } pm_ignored_newline_type_t;
262 
263 static inline pm_ignored_newline_type_t
264 lex_state_ignored_p(pm_parser_t *parser) {
265  bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
266 
267  if (ignored) {
268  return PM_IGNORED_NEWLINE_ALL;
269  } else if ((parser->lex_state & ~((unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
270  return PM_IGNORED_NEWLINE_PATTERN;
271  } else {
272  return PM_IGNORED_NEWLINE_NONE;
273  }
274 }
275 
276 static inline bool
277 lex_state_beg_p(pm_parser_t *parser) {
278  return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
279 }
280 
281 static inline bool
282 lex_state_arg_p(pm_parser_t *parser) {
283  return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
284 }
285 
286 static inline bool
287 lex_state_spcarg_p(pm_parser_t *parser, bool space_seen) {
288  if (parser->current.end >= parser->end) {
289  return false;
290  }
291  return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->current.end);
292 }
293 
294 static inline bool
295 lex_state_end_p(pm_parser_t *parser) {
296  return lex_state_p(parser, PM_LEX_STATE_END_ANY);
297 }
298 
302 static inline bool
303 lex_state_operator_p(pm_parser_t *parser) {
304  return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
305 }
306 
311 static inline void
312 lex_state_set(pm_parser_t *parser, pm_lex_state_t state) {
313  parser->lex_state = state;
314 }
315 
316 #ifndef PM_DEBUG_LOGGING
321 #define PM_DEBUG_LOGGING 0
322 #endif
323 
324 #if PM_DEBUG_LOGGING
325 PRISM_ATTRIBUTE_UNUSED static void
326 debug_state(pm_parser_t *parser) {
327  fprintf(stderr, "STATE: ");
328  bool first = true;
329 
330  if (parser->lex_state == PM_LEX_STATE_NONE) {
331  fprintf(stderr, "NONE\n");
332  return;
333  }
334 
335 #define CHECK_STATE(state) \
336  if (parser->lex_state & state) { \
337  if (!first) fprintf(stderr, "|"); \
338  fprintf(stderr, "%s", #state); \
339  first = false; \
340  }
341 
342  CHECK_STATE(PM_LEX_STATE_BEG)
343  CHECK_STATE(PM_LEX_STATE_END)
344  CHECK_STATE(PM_LEX_STATE_ENDARG)
345  CHECK_STATE(PM_LEX_STATE_ENDFN)
346  CHECK_STATE(PM_LEX_STATE_ARG)
347  CHECK_STATE(PM_LEX_STATE_CMDARG)
348  CHECK_STATE(PM_LEX_STATE_MID)
349  CHECK_STATE(PM_LEX_STATE_FNAME)
350  CHECK_STATE(PM_LEX_STATE_DOT)
351  CHECK_STATE(PM_LEX_STATE_CLASS)
352  CHECK_STATE(PM_LEX_STATE_LABEL)
353  CHECK_STATE(PM_LEX_STATE_LABELED)
354  CHECK_STATE(PM_LEX_STATE_FITEM)
355 
356 #undef CHECK_STATE
357 
358  fprintf(stderr, "\n");
359 }
360 
361 static void
362 debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * caller_name, int line_number) {
363  fprintf(stderr, "Caller: %s:%d\nPrevious: ", caller_name, line_number);
364  debug_state(parser);
365  lex_state_set(parser, state);
366  fprintf(stderr, "Now: ");
367  debug_state(parser);
368  fprintf(stderr, "\n");
369 }
370 
371 #define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
372 #endif
373 
374 /******************************************************************************/
375 /* Command-line macro helpers */
376 /******************************************************************************/
377 
379 #define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
380 
382 #define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
383 
385 #define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
386 
388 #define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
389 
391 #define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
392 
394 #define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
395 
397 #define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
398 
399 /******************************************************************************/
400 /* Diagnostic-related functions */
401 /******************************************************************************/
402 
406 static inline void
407 pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
408  pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
409 }
410 
414 #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
415  pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
416 
421 static inline void
422 pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
423  pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
424 }
425 
430 #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
431  PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
432 
437 static inline void
438 pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
439  pm_parser_err(parser, node->location.start, node->location.end, diag_id);
440 }
441 
446 #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
447  PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
448 
453 #define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
454  PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
455 
460 static inline void
461 pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
462  pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
463 }
464 
469 static inline void
470 pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
471  pm_parser_err(parser, token->start, token->end, diag_id);
472 }
473 
478 #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
479  PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
480 
485 #define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
486  PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
487 
491 static inline void
492 pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
493  pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
494 }
495 
500 static inline void
501 pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
502  pm_parser_warn(parser, token->start, token->end, diag_id);
503 }
504 
509 static inline void
510 pm_parser_warn_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
511  pm_parser_warn(parser, node->location.start, node->location.end, diag_id);
512 }
513 
517 #define PM_PARSER_WARN_FORMAT(parser, start, end, diag_id, ...) \
518  pm_diagnostic_list_append_format(&parser->warning_list, start, end, diag_id, __VA_ARGS__)
519 
524 #define PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, ...) \
525  PM_PARSER_WARN_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
526 
531 #define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
532  PM_PARSER_WARN_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
533 
538 #define PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, ...) \
539  PM_PARSER_WARN_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
540 
546 static void
547 pm_parser_err_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
548  PM_PARSER_ERR_FORMAT(
549  parser,
550  ident_start,
551  ident_start + ident_length,
552  PM_ERR_HEREDOC_TERM,
553  (int) ident_length,
554  (const char *) ident_start
555  );
556 }
557 
558 /******************************************************************************/
559 /* Scope-related functions */
560 /******************************************************************************/
561 
565 static bool
566 pm_parser_scope_push(pm_parser_t *parser, bool closed) {
567  pm_scope_t *scope = (pm_scope_t *) xmalloc(sizeof(pm_scope_t));
568  if (scope == NULL) return false;
569 
570  *scope = (pm_scope_t) {
571  .previous = parser->current_scope,
572  .locals = { 0 },
573  .parameters = PM_SCOPE_PARAMETERS_NONE,
574  .implicit_parameters = { 0 },
575  .shareable_constant = parser->current_scope == NULL ? PM_SCOPE_SHAREABLE_CONSTANT_NONE : parser->current_scope->shareable_constant,
576  .closed = closed
577  };
578 
579  parser->current_scope = scope;
580  return true;
581 }
582 
587 static bool
588 pm_parser_scope_toplevel_p(pm_parser_t *parser) {
589  pm_scope_t *scope = parser->current_scope;
590 
591  do {
592  if (scope->previous == NULL) return true;
593  if (scope->closed) return false;
594  } while ((scope = scope->previous) != NULL);
595 
596  assert(false && "unreachable");
597  return true;
598 }
599 
603 static pm_scope_t *
604 pm_parser_scope_find(pm_parser_t *parser, uint32_t depth) {
605  pm_scope_t *scope = parser->current_scope;
606 
607  while (depth-- > 0) {
608  assert(scope != NULL);
609  scope = scope->previous;
610  }
611 
612  return scope;
613 }
614 
615 typedef enum {
616  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
617  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
618  PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
619 } pm_scope_forwarding_param_check_result_t;
620 
621 static pm_scope_forwarding_param_check_result_t
622 pm_parser_scope_forwarding_param_check(pm_parser_t *parser, const uint8_t mask) {
623  pm_scope_t *scope = parser->current_scope;
624  bool conflict = false;
625 
626  while (scope != NULL) {
627  if (scope->parameters & mask) {
628  if (scope->closed) {
629  if (conflict) {
630  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
631  } else {
632  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
633  }
634  }
635 
636  conflict = true;
637  }
638 
639  if (scope->closed) break;
640  scope = scope->previous;
641  }
642 
643  return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
644 }
645 
646 static void
647 pm_parser_scope_forwarding_block_check(pm_parser_t *parser, const pm_token_t * token) {
648  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
649  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
650  // Pass.
651  break;
652  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
653  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
654  break;
655  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
656  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
657  break;
658  }
659 }
660 
661 static void
662 pm_parser_scope_forwarding_positionals_check(pm_parser_t *parser, const pm_token_t * token) {
663  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
664  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
665  // Pass.
666  break;
667  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
668  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
669  break;
670  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
671  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
672  break;
673  }
674 }
675 
676 static void
677 pm_parser_scope_forwarding_all_check(pm_parser_t *parser, const pm_token_t *token) {
678  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
679  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
680  // Pass.
681  break;
682  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
683  // This shouldn't happen, because ... is not allowed in the
684  // declaration of blocks. If we get here, we assume we already have
685  // an error for this.
686  break;
687  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
688  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
689  break;
690  }
691 }
692 
693 static void
694 pm_parser_scope_forwarding_keywords_check(pm_parser_t *parser, const pm_token_t * token) {
695  switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
696  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
697  // Pass.
698  break;
699  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
700  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
701  break;
702  case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
703  pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
704  break;
705  }
706 }
707 
711 static inline pm_shareable_constant_value_t
712 pm_parser_scope_shareable_constant_get(pm_parser_t *parser) {
713  return parser->current_scope->shareable_constant;
714 }
715 
720 static void
721 pm_parser_scope_shareable_constant_set(pm_parser_t *parser, pm_shareable_constant_value_t shareable_constant) {
722  pm_scope_t *scope = parser->current_scope;
723 
724  do {
725  scope->shareable_constant = shareable_constant;
726  } while (!scope->closed && (scope = scope->previous) != NULL);
727 }
728 
729 /******************************************************************************/
730 /* Local variable-related functions */
731 /******************************************************************************/
732 
736 #define PM_LOCALS_HASH_THRESHOLD 9
737 
738 static void
739 pm_locals_free(pm_locals_t *locals) {
740  if (locals->capacity > 0) {
741  xfree(locals->locals);
742  }
743 }
744 
749 static uint32_t
750 pm_locals_hash(pm_constant_id_t name) {
751  name = ((name >> 16) ^ name) * 0x45d9f3b;
752  name = ((name >> 16) ^ name) * 0x45d9f3b;
753  name = (name >> 16) ^ name;
754  return name;
755 }
756 
761 static void
762 pm_locals_resize(pm_locals_t *locals) {
763  uint32_t next_capacity = locals->capacity == 0 ? 4 : (locals->capacity * 2);
764  assert(next_capacity > locals->capacity);
765 
766  pm_local_t *next_locals = xcalloc(next_capacity, sizeof(pm_local_t));
767  if (next_locals == NULL) abort();
768 
769  if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
770  if (locals->size > 0) {
771  memcpy(next_locals, locals->locals, locals->size * sizeof(pm_local_t));
772  }
773  } else {
774  // If we just switched from a list to a hash, then we need to fill in
775  // the hash values of all of the locals.
776  bool hash_needed = (locals->capacity <= PM_LOCALS_HASH_THRESHOLD);
777  uint32_t mask = next_capacity - 1;
778 
779  for (uint32_t index = 0; index < locals->capacity; index++) {
780  pm_local_t *local = &locals->locals[index];
781 
782  if (local->name != PM_CONSTANT_ID_UNSET) {
783  if (hash_needed) local->hash = pm_locals_hash(local->name);
784 
785  uint32_t hash = local->hash;
786  while (next_locals[hash & mask].name != PM_CONSTANT_ID_UNSET) hash++;
787  next_locals[hash & mask] = *local;
788  }
789  }
790  }
791 
792  pm_locals_free(locals);
793  locals->locals = next_locals;
794  locals->capacity = next_capacity;
795 }
796 
812 static bool
813 pm_locals_write(pm_locals_t *locals, pm_constant_id_t name, const uint8_t *start, const uint8_t *end, uint32_t reads) {
814  if (locals->size >= (locals->capacity / 4 * 3)) {
815  pm_locals_resize(locals);
816  }
817 
818  if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
819  for (uint32_t index = 0; index < locals->capacity; index++) {
820  pm_local_t *local = &locals->locals[index];
821 
822  if (local->name == PM_CONSTANT_ID_UNSET) {
823  *local = (pm_local_t) {
824  .name = name,
825  .location = { .start = start, .end = end },
826  .index = locals->size++,
827  .reads = reads,
828  .hash = 0
829  };
830  return true;
831  } else if (local->name == name) {
832  return false;
833  }
834  }
835  } else {
836  uint32_t mask = locals->capacity - 1;
837  uint32_t hash = pm_locals_hash(name);
838  uint32_t initial_hash = hash;
839 
840  do {
841  pm_local_t *local = &locals->locals[hash & mask];
842 
843  if (local->name == PM_CONSTANT_ID_UNSET) {
844  *local = (pm_local_t) {
845  .name = name,
846  .location = { .start = start, .end = end },
847  .index = locals->size++,
848  .reads = reads,
849  .hash = initial_hash
850  };
851  return true;
852  } else if (local->name == name) {
853  return false;
854  } else {
855  hash++;
856  }
857  } while ((hash & mask) != initial_hash);
858  }
859 
860  assert(false && "unreachable");
861  return true;
862 }
863 
868 static uint32_t
869 pm_locals_find(pm_locals_t *locals, pm_constant_id_t name) {
870  if (locals->capacity < PM_LOCALS_HASH_THRESHOLD) {
871  for (uint32_t index = 0; index < locals->size; index++) {
872  pm_local_t *local = &locals->locals[index];
873  if (local->name == name) return index;
874  }
875  } else {
876  uint32_t mask = locals->capacity - 1;
877  uint32_t hash = pm_locals_hash(name);
878  uint32_t initial_hash = hash & mask;
879 
880  do {
881  pm_local_t *local = &locals->locals[hash & mask];
882 
883  if (local->name == PM_CONSTANT_ID_UNSET) {
884  return UINT32_MAX;
885  } else if (local->name == name) {
886  return hash & mask;
887  } else {
888  hash++;
889  }
890  } while ((hash & mask) != initial_hash);
891  }
892 
893  return UINT32_MAX;
894 }
895 
900 static void
901 pm_locals_read(pm_locals_t *locals, pm_constant_id_t name) {
902  uint32_t index = pm_locals_find(locals, name);
903  assert(index != UINT32_MAX);
904 
905  pm_local_t *local = &locals->locals[index];
906  assert(local->reads < UINT32_MAX);
907 
908  local->reads++;
909 }
910 
915 static void
916 pm_locals_unread(pm_locals_t *locals, pm_constant_id_t name) {
917  uint32_t index = pm_locals_find(locals, name);
918  assert(index != UINT32_MAX);
919 
920  pm_local_t *local = &locals->locals[index];
921  assert(local->reads > 0);
922 
923  local->reads--;
924 }
925 
929 static uint32_t
930 pm_locals_reads(pm_locals_t *locals, pm_constant_id_t name) {
931  uint32_t index = pm_locals_find(locals, name);
932  assert(index != UINT32_MAX);
933 
934  return locals->locals[index].reads;
935 }
936 
945 static void
946 pm_locals_order(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, pm_locals_t *locals, pm_constant_id_list_t *list, bool toplevel) {
948 
949  // If we're still below the threshold for switching to a hash, then we only
950  // need to loop over the locals until we hit the size because the locals are
951  // stored in a list.
952  uint32_t capacity = locals->capacity < PM_LOCALS_HASH_THRESHOLD ? locals->size : locals->capacity;
953 
954  // We will only warn for unused variables if we're not at the top level, or
955  // if we're parsing a file outside of eval or -e.
956  bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
957 
958  for (uint32_t index = 0; index < capacity; index++) {
959  pm_local_t *local = &locals->locals[index];
960 
961  if (local->name != PM_CONSTANT_ID_UNSET) {
962  pm_constant_id_list_insert(list, (size_t) local->index, local->name);
963 
964  if (warn_unused && local->reads == 0 && ((parser->start_line >= 0) || (pm_newline_list_line(&parser->newline_list, local->location.start, parser->start_line) >= 0))) {
965  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->name);
966 
967  if (constant->length >= 1 && *constant->start != '_') {
968  PM_PARSER_WARN_FORMAT(
969  parser,
970  local->location.start,
971  local->location.end,
972  PM_WARN_UNUSED_LOCAL_VARIABLE,
973  (int) constant->length,
974  (const char *) constant->start
975  );
976  }
977  }
978  }
979  }
980 }
981 
982 /******************************************************************************/
983 /* Node-related functions */
984 /******************************************************************************/
985 
989 static inline pm_constant_id_t
990 pm_parser_constant_id_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
991  return pm_constant_pool_insert_shared(&parser->constant_pool, start, (size_t) (end - start));
992 }
993 
997 static inline pm_constant_id_t
998 pm_parser_constant_id_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
999  return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
1000 }
1001 
1005 static inline pm_constant_id_t
1006 pm_parser_constant_id_constant(pm_parser_t *parser, const char *start, size_t length) {
1007  return pm_constant_pool_insert_constant(&parser->constant_pool, (const uint8_t *) start, length);
1008 }
1009 
1013 static inline pm_constant_id_t
1014 pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1015  return pm_parser_constant_id_location(parser, token->start, token->end);
1016 }
1017 
1022 static inline pm_constant_id_t
1023 pm_parser_optional_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
1024  return token->type == PM_TOKEN_NOT_PROVIDED ? 0 : pm_parser_constant_id_token(parser, token);
1025 }
1026 
1032 static pm_node_t *
1033 pm_check_value_expression(pm_parser_t *parser, pm_node_t *node) {
1034  pm_node_t *void_node = NULL;
1035 
1036  while (node != NULL) {
1037  switch (PM_NODE_TYPE(node)) {
1038  case PM_RETURN_NODE:
1039  case PM_BREAK_NODE:
1040  case PM_NEXT_NODE:
1041  case PM_REDO_NODE:
1042  case PM_RETRY_NODE:
1044  return void_node != NULL ? void_node : node;
1046  return NULL;
1047  case PM_BEGIN_NODE: {
1048  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1049 
1050  if (cast->ensure_clause != NULL) {
1051  if (cast->rescue_clause != NULL) {
1052  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->rescue_clause);
1053  if (vn != NULL) return vn;
1054  }
1055 
1056  if (cast->statements != NULL) {
1057  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1058  if (vn != NULL) return vn;
1059  }
1060 
1061  node = (pm_node_t *) cast->ensure_clause;
1062  } else if (cast->rescue_clause != NULL) {
1063  if (cast->statements == NULL) return NULL;
1064 
1065  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1066  if (vn == NULL) return NULL;
1067  if (void_node == NULL) void_node = vn;
1068 
1069  for (pm_rescue_node_t *rescue_clause = cast->rescue_clause; rescue_clause != NULL; rescue_clause = rescue_clause->subsequent) {
1070  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) rescue_clause->statements);
1071  if (vn == NULL) {
1072  void_node = NULL;
1073  break;
1074  }
1075  if (void_node == NULL) {
1076  void_node = vn;
1077  }
1078  }
1079 
1080  if (cast->else_clause != NULL) {
1081  node = (pm_node_t *) cast->else_clause;
1082  } else {
1083  return void_node;
1084  }
1085  } else {
1086  node = (pm_node_t *) cast->statements;
1087  }
1088 
1089  break;
1090  }
1091  case PM_ENSURE_NODE: {
1092  pm_ensure_node_t *cast = (pm_ensure_node_t *) node;
1093  node = (pm_node_t *) cast->statements;
1094  break;
1095  }
1096  case PM_PARENTHESES_NODE: {
1098  node = (pm_node_t *) cast->body;
1099  break;
1100  }
1101  case PM_STATEMENTS_NODE: {
1102  pm_statements_node_t *cast = (pm_statements_node_t *) node;
1103  node = cast->body.nodes[cast->body.size - 1];
1104  break;
1105  }
1106  case PM_IF_NODE: {
1107  pm_if_node_t *cast = (pm_if_node_t *) node;
1108  if (cast->statements == NULL || cast->subsequent == NULL) {
1109  return NULL;
1110  }
1111  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1112  if (vn == NULL) {
1113  return NULL;
1114  }
1115  if (void_node == NULL) {
1116  void_node = vn;
1117  }
1118  node = cast->subsequent;
1119  break;
1120  }
1121  case PM_UNLESS_NODE: {
1122  pm_unless_node_t *cast = (pm_unless_node_t *) node;
1123  if (cast->statements == NULL || cast->else_clause == NULL) {
1124  return NULL;
1125  }
1126  pm_node_t *vn = pm_check_value_expression(parser, (pm_node_t *) cast->statements);
1127  if (vn == NULL) {
1128  return NULL;
1129  }
1130  if (void_node == NULL) {
1131  void_node = vn;
1132  }
1133  node = (pm_node_t *) cast->else_clause;
1134  break;
1135  }
1136  case PM_ELSE_NODE: {
1137  pm_else_node_t *cast = (pm_else_node_t *) node;
1138  node = (pm_node_t *) cast->statements;
1139  break;
1140  }
1141  case PM_AND_NODE: {
1142  pm_and_node_t *cast = (pm_and_node_t *) node;
1143  node = cast->left;
1144  break;
1145  }
1146  case PM_OR_NODE: {
1147  pm_or_node_t *cast = (pm_or_node_t *) node;
1148  node = cast->left;
1149  break;
1150  }
1153 
1154  pm_scope_t *scope = parser->current_scope;
1155  for (uint32_t depth = 0; depth < cast->depth; depth++) scope = scope->previous;
1156 
1157  pm_locals_read(&scope->locals, cast->name);
1158  return NULL;
1159  }
1160  default:
1161  return NULL;
1162  }
1163  }
1164 
1165  return NULL;
1166 }
1167 
1168 static inline void
1169 pm_assert_value_expression(pm_parser_t *parser, pm_node_t *node) {
1170  pm_node_t *void_node = pm_check_value_expression(parser, node);
1171  if (void_node != NULL) {
1172  pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1173  }
1174 }
1175 
1179 static void
1180 pm_void_statement_check(pm_parser_t *parser, const pm_node_t *node) {
1181  const char *type = NULL;
1182  int length = 0;
1183 
1184  switch (PM_NODE_TYPE(node)) {
1191  type = "a variable";
1192  length = 10;
1193  break;
1194  case PM_CALL_NODE: {
1195  const pm_call_node_t *cast = (const pm_call_node_t *) node;
1196  if (cast->call_operator_loc.start != NULL || cast->message_loc.start == NULL) break;
1197 
1198  const pm_constant_t *message = pm_constant_pool_id_to_constant(&parser->constant_pool, cast->name);
1199  switch (message->length) {
1200  case 1:
1201  switch (message->start[0]) {
1202  case '+':
1203  case '-':
1204  case '*':
1205  case '/':
1206  case '%':
1207  case '|':
1208  case '^':
1209  case '&':
1210  case '>':
1211  case '<':
1212  type = (const char *) message->start;
1213  length = 1;
1214  break;
1215  }
1216  break;
1217  case 2:
1218  switch (message->start[1]) {
1219  case '=':
1220  if (message->start[0] == '<' || message->start[0] == '>' || message->start[0] == '!' || message->start[0] == '=') {
1221  type = (const char *) message->start;
1222  length = 2;
1223  }
1224  break;
1225  case '@':
1226  if (message->start[0] == '+' || message->start[0] == '-') {
1227  type = (const char *) message->start;
1228  length = 2;
1229  }
1230  break;
1231  case '*':
1232  if (message->start[0] == '*') {
1233  type = (const char *) message->start;
1234  length = 2;
1235  }
1236  break;
1237  }
1238  break;
1239  case 3:
1240  if (memcmp(message->start, "<=>", 3) == 0) {
1241  type = "<=>";
1242  length = 3;
1243  }
1244  break;
1245  }
1246 
1247  break;
1248  }
1249  case PM_CONSTANT_PATH_NODE:
1250  type = "::";
1251  length = 2;
1252  break;
1253  case PM_CONSTANT_READ_NODE:
1254  type = "a constant";
1255  length = 10;
1256  break;
1257  case PM_DEFINED_NODE:
1258  type = "defined?";
1259  length = 8;
1260  break;
1261  case PM_FALSE_NODE:
1262  type = "false";
1263  length = 5;
1264  break;
1265  case PM_FLOAT_NODE:
1266  case PM_IMAGINARY_NODE:
1267  case PM_INTEGER_NODE:
1270  case PM_RATIONAL_NODE:
1273  case PM_SOURCE_FILE_NODE:
1274  case PM_SOURCE_LINE_NODE:
1275  case PM_STRING_NODE:
1276  case PM_SYMBOL_NODE:
1277  type = "a literal";
1278  length = 9;
1279  break;
1280  case PM_NIL_NODE:
1281  type = "nil";
1282  length = 3;
1283  break;
1284  case PM_RANGE_NODE: {
1285  const pm_range_node_t *cast = (const pm_range_node_t *) node;
1286 
1288  type = "...";
1289  length = 3;
1290  } else {
1291  type = "..";
1292  length = 2;
1293  }
1294 
1295  break;
1296  }
1297  case PM_SELF_NODE:
1298  type = "self";
1299  length = 4;
1300  break;
1301  case PM_TRUE_NODE:
1302  type = "true";
1303  length = 4;
1304  break;
1305  default:
1306  break;
1307  }
1308 
1309  if (type != NULL) {
1310  PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length, type);
1311  }
1312 }
1313 
1318 static void
1319 pm_void_statements_check(pm_parser_t *parser, const pm_statements_node_t *node, bool last_value) {
1320  assert(node->body.size > 0);
1321  const size_t size = node->body.size - (last_value ? 1 : 0);
1322  for (size_t index = 0; index < size; index++) {
1323  pm_void_statement_check(parser, node->body.nodes[index]);
1324  }
1325 }
1326 
1332 typedef enum {
1333  PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1334  PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1335  PM_CONDITIONAL_PREDICATE_TYPE_NOT
1336 } pm_conditional_predicate_type_t;
1337 
1341 static void
1342 pm_parser_warn_conditional_predicate_literal(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type, pm_diagnostic_id_t diag_id, const char *prefix) {
1343  switch (type) {
1344  case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1345  PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "condition");
1346  break;
1347  case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1348  PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix, "flip-flop");
1349  break;
1350  case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1351  break;
1352  }
1353 }
1354 
1359 static bool
1360 pm_conditional_predicate_warn_write_literal_p(const pm_node_t *node) {
1361  switch (PM_NODE_TYPE(node)) {
1362  case PM_ARRAY_NODE: {
1363  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1364 
1365  const pm_array_node_t *cast = (const pm_array_node_t *) node;
1366  for (size_t index = 0; index < cast->elements.size; index++) {
1367  if (!pm_conditional_predicate_warn_write_literal_p(cast->elements.nodes[index])) return false;
1368  }
1369 
1370  return true;
1371  }
1372  case PM_HASH_NODE: {
1373  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) return true;
1374 
1375  const pm_hash_node_t *cast = (const pm_hash_node_t *) node;
1376  for (size_t index = 0; index < cast->elements.size; index++) {
1377  const pm_node_t *element = cast->elements.nodes[index];
1378  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE)) return false;
1379 
1380  const pm_assoc_node_t *assoc = (const pm_assoc_node_t *) element;
1381  if (!pm_conditional_predicate_warn_write_literal_p(assoc->key) || !pm_conditional_predicate_warn_write_literal_p(assoc->value)) return false;
1382  }
1383 
1384  return true;
1385  }
1386  case PM_FALSE_NODE:
1387  case PM_FLOAT_NODE:
1388  case PM_IMAGINARY_NODE:
1389  case PM_INTEGER_NODE:
1390  case PM_NIL_NODE:
1391  case PM_RATIONAL_NODE:
1394  case PM_SOURCE_FILE_NODE:
1395  case PM_SOURCE_LINE_NODE:
1396  case PM_STRING_NODE:
1397  case PM_SYMBOL_NODE:
1398  case PM_TRUE_NODE:
1399  return true;
1400  default:
1401  return false;
1402  }
1403 }
1404 
1409 static inline void
1410 pm_conditional_predicate_warn_write_literal(pm_parser_t *parser, const pm_node_t *node) {
1411  if (pm_conditional_predicate_warn_write_literal_p(node)) {
1412  pm_parser_warn_node(parser, node, parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_WARN_EQUAL_IN_CONDITIONAL_3_3 : PM_WARN_EQUAL_IN_CONDITIONAL);
1413  }
1414 }
1415 
1428 static void
1429 pm_conditional_predicate(pm_parser_t *parser, pm_node_t *node, pm_conditional_predicate_type_t type) {
1430  switch (PM_NODE_TYPE(node)) {
1431  case PM_AND_NODE: {
1432  pm_and_node_t *cast = (pm_and_node_t *) node;
1433  pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1434  pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1435  break;
1436  }
1437  case PM_OR_NODE: {
1438  pm_or_node_t *cast = (pm_or_node_t *) node;
1439  pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1440  pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1441  break;
1442  }
1443  case PM_PARENTHESES_NODE: {
1445 
1446  if ((cast->body != NULL) && PM_NODE_TYPE_P(cast->body, PM_STATEMENTS_NODE)) {
1447  pm_statements_node_t *statements = (pm_statements_node_t *) cast->body;
1448  if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1449  }
1450 
1451  break;
1452  }
1453  case PM_BEGIN_NODE: {
1454  pm_begin_node_t *cast = (pm_begin_node_t *) node;
1455  if (cast->statements != NULL) {
1456  pm_statements_node_t *statements = cast->statements;
1457  if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0], type);
1458  }
1459  break;
1460  }
1461  case PM_RANGE_NODE: {
1462  pm_range_node_t *cast = (pm_range_node_t *) node;
1463 
1464  if (cast->left != NULL) pm_conditional_predicate(parser, cast->left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1465  if (cast->right != NULL) pm_conditional_predicate(parser, cast->right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1466 
1467  // Here we change the range node into a flip flop node. We can do
1468  // this since the nodes are exactly the same except for the type.
1469  // We're only asserting against the size when we should probably
1470  // assert against the entire layout, but we'll assume tests will
1471  // catch this.
1472  assert(sizeof(pm_range_node_t) == sizeof(pm_flip_flop_node_t));
1473  node->type = PM_FLIP_FLOP_NODE;
1474 
1475  break;
1476  }
1478  // Here we change the regular expression node into a match last line
1479  // node. We can do this since the nodes are exactly the same except
1480  // for the type.
1481  assert(sizeof(pm_regular_expression_node_t) == sizeof(pm_match_last_line_node_t));
1482  node->type = PM_MATCH_LAST_LINE_NODE;
1483 
1484  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1485  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "regex ");
1486  }
1487 
1488  break;
1490  // Here we change the interpolated regular expression node into an
1491  // interpolated match last line node. We can do this since the nodes
1492  // are exactly the same except for the type.
1495 
1496  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1497  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "regex ");
1498  }
1499 
1500  break;
1501  case PM_INTEGER_NODE:
1502  if (type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1503  if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1504  pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1505  }
1506  } else {
1507  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1508  }
1509  break;
1510  case PM_STRING_NODE:
1511  case PM_SOURCE_FILE_NODE:
1513  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT, "string ");
1514  break;
1515  case PM_SYMBOL_NODE:
1517  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "symbol ");
1518  break;
1519  case PM_SOURCE_LINE_NODE:
1521  case PM_FLOAT_NODE:
1522  case PM_RATIONAL_NODE:
1523  case PM_IMAGINARY_NODE:
1524  pm_parser_warn_conditional_predicate_literal(parser, node, type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE, "");
1525  break;
1527  pm_conditional_predicate_warn_write_literal(parser, ((pm_class_variable_write_node_t *) node)->value);
1528  break;
1530  pm_conditional_predicate_warn_write_literal(parser, ((pm_constant_write_node_t *) node)->value);
1531  break;
1533  pm_conditional_predicate_warn_write_literal(parser, ((pm_global_variable_write_node_t *) node)->value);
1534  break;
1536  pm_conditional_predicate_warn_write_literal(parser, ((pm_instance_variable_write_node_t *) node)->value);
1537  break;
1539  pm_conditional_predicate_warn_write_literal(parser, ((pm_local_variable_write_node_t *) node)->value);
1540  break;
1541  case PM_MULTI_WRITE_NODE:
1542  pm_conditional_predicate_warn_write_literal(parser, ((pm_multi_write_node_t *) node)->value);
1543  break;
1544  default:
1545  break;
1546  }
1547 }
1548 
1557 static inline pm_token_t
1558 not_provided(pm_parser_t *parser) {
1559  return (pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start };
1560 }
1561 
1562 #define PM_LOCATION_NULL_VALUE(parser) ((pm_location_t) { .start = (parser)->start, .end = (parser)->start })
1563 #define PM_LOCATION_TOKEN_VALUE(token) ((pm_location_t) { .start = (token)->start, .end = (token)->end })
1564 #define PM_LOCATION_NODE_VALUE(node) ((pm_location_t) { .start = (node)->location.start, .end = (node)->location.end })
1565 #define PM_LOCATION_NODE_BASE_VALUE(node) ((pm_location_t) { .start = (node)->base.location.start, .end = (node)->base.location.end })
1566 #define PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE ((pm_location_t) { .start = NULL, .end = NULL })
1567 #define PM_OPTIONAL_LOCATION_TOKEN_VALUE(token) ((token)->type == PM_TOKEN_NOT_PROVIDED ? PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE : PM_LOCATION_TOKEN_VALUE(token))
1568 
1575 typedef struct {
1578 
1581 
1584 
1587 
1590 } pm_arguments_t;
1591 
1595 static inline const uint8_t *
1596 pm_arguments_end(pm_arguments_t *arguments) {
1597  if (arguments->block != NULL) {
1598  const uint8_t *end = arguments->block->location.end;
1599  if (arguments->closing_loc.start != NULL && arguments->closing_loc.end > end) {
1600  end = arguments->closing_loc.end;
1601  }
1602  return end;
1603  }
1604  if (arguments->closing_loc.start != NULL) {
1605  return arguments->closing_loc.end;
1606  }
1607  if (arguments->arguments != NULL) {
1608  return arguments->arguments->base.location.end;
1609  }
1610  return arguments->closing_loc.end;
1611 }
1612 
1617 static void
1618 pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_block_node_t *block) {
1619  // First, check that we have arguments and that we don't have a closing
1620  // location for them.
1621  if (arguments->arguments == NULL || arguments->closing_loc.start != NULL) {
1622  return;
1623  }
1624 
1625  // Next, check that we don't have a single parentheses argument. This would
1626  // look like:
1627  //
1628  // foo (1) {}
1629  //
1630  // In this case, it's actually okay for the block to be attached to the
1631  // call, even though it looks like it's attached to the argument.
1632  if (arguments->arguments->arguments.size == 1 && PM_NODE_TYPE_P(arguments->arguments->arguments.nodes[0], PM_PARENTHESES_NODE)) {
1633  return;
1634  }
1635 
1636  // If we didn't hit a case before this check, then at this point we need to
1637  // add a syntax error.
1638  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1639 }
1640 
1641 /******************************************************************************/
1642 /* Basic character checks */
1643 /******************************************************************************/
1644 
1651 static inline size_t
1652 char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
1653  if (parser->encoding_changed) {
1654  size_t width;
1655  if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
1656  return width;
1657  } else if (*b == '_') {
1658  return 1;
1659  } else if (*b >= 0x80) {
1660  return parser->encoding->char_width(b, parser->end - b);
1661  } else {
1662  return 0;
1663  }
1664  } else if (*b < 0x80) {
1665  return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
1666  } else {
1667  return pm_encoding_utf_8_char_width(b, parser->end - b);
1668  }
1669 }
1670 
1675 static inline size_t
1676 char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
1677  if (*b < 0x80) {
1678  return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
1679  } else {
1680  return pm_encoding_utf_8_char_width(b, end - b);
1681  }
1682 }
1683 
1689 static inline size_t
1690 char_is_identifier(const pm_parser_t *parser, const uint8_t *b) {
1691  if (parser->encoding_changed) {
1692  size_t width;
1693  if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
1694  return width;
1695  } else if (*b == '_') {
1696  return 1;
1697  } else if (*b >= 0x80) {
1698  return parser->encoding->char_width(b, parser->end - b);
1699  } else {
1700  return 0;
1701  }
1702  }
1703  return char_is_identifier_utf8(b, parser->end);
1704 }
1705 
1706 // Here we're defining a perfect hash for the characters that are allowed in
1707 // global names. This is used to quickly check the next character after a $ to
1708 // see if it's a valid character for a global name.
1709 #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1710 #define PUNCT(idx) ( \
1711  BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1712  BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1713  BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1714  BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1715  BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1716  BIT('0', idx))
1717 
1718 const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1719 
1720 #undef BIT
1721 #undef PUNCT
1722 
1723 static inline bool
1724 char_is_global_name_punctuation(const uint8_t b) {
1725  const unsigned int i = (const unsigned int) b;
1726  if (i <= 0x20 || 0x7e < i) return false;
1727 
1728  return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1729 }
1730 
1731 static inline bool
1732 token_is_setter_name(pm_token_t *token) {
1733  return (
1734  (token->type == PM_TOKEN_IDENTIFIER) &&
1735  (token->end - token->start >= 2) &&
1736  (token->end[-1] == '=')
1737  );
1738 }
1739 
1743 static bool
1744 pm_local_is_keyword(const char *source, size_t length) {
1745 #define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1746 
1747  switch (length) {
1748  case 2:
1749  switch (source[0]) {
1750  case 'd': KEYWORD("do"); return false;
1751  case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1752  case 'o': KEYWORD("or"); return false;
1753  default: return false;
1754  }
1755  case 3:
1756  switch (source[0]) {
1757  case 'a': KEYWORD("and"); return false;
1758  case 'd': KEYWORD("def"); return false;
1759  case 'e': KEYWORD("end"); return false;
1760  case 'f': KEYWORD("for"); return false;
1761  case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1762  default: return false;
1763  }
1764  case 4:
1765  switch (source[0]) {
1766  case 'c': KEYWORD("case"); return false;
1767  case 'e': KEYWORD("else"); return false;
1768  case 'n': KEYWORD("next"); return false;
1769  case 'r': KEYWORD("redo"); return false;
1770  case 's': KEYWORD("self"); return false;
1771  case 't': KEYWORD("then"); KEYWORD("true"); return false;
1772  case 'w': KEYWORD("when"); return false;
1773  default: return false;
1774  }
1775  case 5:
1776  switch (source[0]) {
1777  case 'a': KEYWORD("alias"); return false;
1778  case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1779  case 'c': KEYWORD("class"); return false;
1780  case 'e': KEYWORD("elsif"); return false;
1781  case 'f': KEYWORD("false"); return false;
1782  case 'r': KEYWORD("retry"); return false;
1783  case 's': KEYWORD("super"); return false;
1784  case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1785  case 'w': KEYWORD("while"); return false;
1786  case 'y': KEYWORD("yield"); return false;
1787  default: return false;
1788  }
1789  case 6:
1790  switch (source[0]) {
1791  case 'e': KEYWORD("ensure"); return false;
1792  case 'm': KEYWORD("module"); return false;
1793  case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1794  case 'u': KEYWORD("unless"); return false;
1795  default: return false;
1796  }
1797  case 8:
1798  KEYWORD("__LINE__");
1799  KEYWORD("__FILE__");
1800  return false;
1801  case 12:
1802  KEYWORD("__ENCODING__");
1803  return false;
1804  default:
1805  return false;
1806  }
1807 
1808 #undef KEYWORD
1809 }
1810 
1811 /******************************************************************************/
1812 /* Node flag handling functions */
1813 /******************************************************************************/
1814 
1818 static inline void
1819 pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1820  node->flags |= flag;
1821 }
1822 
1826 static inline void
1827 pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1828  node->flags &= (pm_node_flags_t) ~flag;
1829 }
1830 
1834 static inline void
1835 pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1836  assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1837  PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1838  PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1839  PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1840  PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1841  PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1842  PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1843  PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1844 
1845  pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1846 }
1847 
1848 /******************************************************************************/
1849 /* Node creation functions */
1850 /******************************************************************************/
1851 
1857 #define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1858 
1862 static inline pm_node_flags_t
1863 pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1864  pm_node_flags_t flags = 0;
1865 
1866  if (closing->type == PM_TOKEN_REGEXP_END) {
1867  pm_buffer_t unknown_flags = { 0 };
1868 
1869  for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1870  switch (*flag) {
1871  case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1872  case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1873  case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1874  case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1875 
1876  case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1877  case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1878  case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1879  case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1880 
1881  default: pm_buffer_append_byte(&unknown_flags, *flag);
1882  }
1883  }
1884 
1885  size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1886  if (unknown_flags_length != 0) {
1887  const char *word = unknown_flags_length >= 2 ? "options" : "option";
1888  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1889  }
1890  pm_buffer_free(&unknown_flags);
1891  }
1892 
1893  return flags;
1894 }
1895 
1896 #undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1897 
1898 static pm_statements_node_t *
1899 pm_statements_node_create(pm_parser_t *parser);
1900 
1901 static void
1902 pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
1903 
1904 static size_t
1905 pm_statements_node_body_length(pm_statements_node_t *node);
1906 
1911 static inline void *
1912 pm_node_alloc(PRISM_ATTRIBUTE_UNUSED pm_parser_t *parser, size_t size) {
1913  void *memory = xcalloc(1, size);
1914  if (memory == NULL) {
1915  fprintf(stderr, "Failed to allocate %d bytes\n", (int) size);
1916  abort();
1917  }
1918  return memory;
1919 }
1920 
1921 #define PM_NODE_ALLOC(parser, type) (type *) pm_node_alloc(parser, sizeof(type))
1922 #define PM_NODE_IDENTIFY(parser) (++parser->node_id)
1923 
1927 static pm_missing_node_t *
1928 pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
1929  pm_missing_node_t *node = PM_NODE_ALLOC(parser, pm_missing_node_t);
1930 
1931  *node = (pm_missing_node_t) {{
1932  .type = PM_MISSING_NODE,
1933  .node_id = PM_NODE_IDENTIFY(parser),
1934  .location = { .start = start, .end = end }
1935  }};
1936 
1937  return node;
1938 }
1939 
1943 static pm_alias_global_variable_node_t *
1944 pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1945  assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1946  pm_alias_global_variable_node_t *node = PM_NODE_ALLOC(parser, pm_alias_global_variable_node_t);
1947 
1948  *node = (pm_alias_global_variable_node_t) {
1949  {
1950  .type = PM_ALIAS_GLOBAL_VARIABLE_NODE,
1951  .node_id = PM_NODE_IDENTIFY(parser),
1952  .location = {
1953  .start = keyword->start,
1954  .end = old_name->location.end
1955  },
1956  },
1957  .new_name = new_name,
1958  .old_name = old_name,
1959  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1960  };
1961 
1962  return node;
1963 }
1964 
1968 static pm_alias_method_node_t *
1969 pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
1970  assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
1971  pm_alias_method_node_t *node = PM_NODE_ALLOC(parser, pm_alias_method_node_t);
1972 
1973  *node = (pm_alias_method_node_t) {
1974  {
1975  .type = PM_ALIAS_METHOD_NODE,
1976  .node_id = PM_NODE_IDENTIFY(parser),
1977  .location = {
1978  .start = keyword->start,
1979  .end = old_name->location.end
1980  },
1981  },
1982  .new_name = new_name,
1983  .old_name = old_name,
1984  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1985  };
1986 
1987  return node;
1988 }
1989 
1993 static pm_alternation_pattern_node_t *
1994 pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
1995  pm_alternation_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_alternation_pattern_node_t);
1996 
1997  *node = (pm_alternation_pattern_node_t) {
1998  {
1999  .type = PM_ALTERNATION_PATTERN_NODE,
2000  .node_id = PM_NODE_IDENTIFY(parser),
2001  .location = {
2002  .start = left->location.start,
2003  .end = right->location.end
2004  },
2005  },
2006  .left = left,
2007  .right = right,
2008  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2009  };
2010 
2011  return node;
2012 }
2013 
2017 static pm_and_node_t *
2018 pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2019  pm_assert_value_expression(parser, left);
2020 
2021  pm_and_node_t *node = PM_NODE_ALLOC(parser, pm_and_node_t);
2022 
2023  *node = (pm_and_node_t) {
2024  {
2025  .type = PM_AND_NODE,
2026  .node_id = PM_NODE_IDENTIFY(parser),
2027  .location = {
2028  .start = left->location.start,
2029  .end = right->location.end
2030  },
2031  },
2032  .left = left,
2033  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2034  .right = right
2035  };
2036 
2037  return node;
2038 }
2039 
2043 static pm_arguments_node_t *
2044 pm_arguments_node_create(pm_parser_t *parser) {
2045  pm_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_arguments_node_t);
2046 
2047  *node = (pm_arguments_node_t) {
2048  {
2049  .type = PM_ARGUMENTS_NODE,
2050  .node_id = PM_NODE_IDENTIFY(parser),
2051  .location = PM_LOCATION_NULL_VALUE(parser)
2052  },
2053  .arguments = { 0 }
2054  };
2055 
2056  return node;
2057 }
2058 
2062 static size_t
2063 pm_arguments_node_size(pm_arguments_node_t *node) {
2064  return node->arguments.size;
2065 }
2066 
2070 static void
2071 pm_arguments_node_arguments_append(pm_arguments_node_t *node, pm_node_t *argument) {
2072  if (pm_arguments_node_size(node) == 0) {
2073  node->base.location.start = argument->location.start;
2074  }
2075 
2076  node->base.location.end = argument->location.end;
2077  pm_node_list_append(&node->arguments, argument);
2078 
2079  if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2080  if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2081  pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2082  } else {
2083  pm_node_flag_set((pm_node_t *) node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2084  }
2085  }
2086 }
2087 
2091 static pm_array_node_t *
2092 pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2093  pm_array_node_t *node = PM_NODE_ALLOC(parser, pm_array_node_t);
2094 
2095  *node = (pm_array_node_t) {
2096  {
2097  .type = PM_ARRAY_NODE,
2098  .flags = PM_NODE_FLAG_STATIC_LITERAL,
2099  .node_id = PM_NODE_IDENTIFY(parser),
2100  .location = PM_LOCATION_TOKEN_VALUE(opening)
2101  },
2102  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2103  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2104  .elements = { 0 }
2105  };
2106 
2107  return node;
2108 }
2109 
2113 static inline void
2114 pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
2115  if (!node->elements.size && !node->opening_loc.start) {
2116  node->base.location.start = element->location.start;
2117  }
2118 
2119  pm_node_list_append(&node->elements, element);
2120  node->base.location.end = element->location.end;
2121 
2122  // If the element is not a static literal, then the array is not a static
2123  // literal. Turn that flag off.
2124  if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2125  pm_node_flag_unset((pm_node_t *)node, PM_NODE_FLAG_STATIC_LITERAL);
2126  }
2127 
2128  if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2129  pm_node_flag_set((pm_node_t *)node, PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2130  }
2131 }
2132 
2136 static void
2137 pm_array_node_close_set(pm_array_node_t *node, const pm_token_t *closing) {
2138  assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == PM_TOKEN_MISSING || closing->type == PM_TOKEN_NOT_PROVIDED);
2139  node->base.location.end = closing->end;
2140  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2141 }
2142 
2147 static pm_array_pattern_node_t *
2148 pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2149  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2150 
2151  *node = (pm_array_pattern_node_t) {
2152  {
2153  .type = PM_ARRAY_PATTERN_NODE,
2154  .node_id = PM_NODE_IDENTIFY(parser),
2155  .location = {
2156  .start = nodes->nodes[0]->location.start,
2157  .end = nodes->nodes[nodes->size - 1]->location.end
2158  },
2159  },
2160  .constant = NULL,
2161  .rest = NULL,
2162  .requireds = { 0 },
2163  .posts = { 0 },
2164  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2165  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2166  };
2167 
2168  // For now we're going to just copy over each pointer manually. This could be
2169  // much more efficient, as we could instead resize the node list.
2170  bool found_rest = false;
2171  pm_node_t *child;
2172 
2173  PM_NODE_LIST_FOREACH(nodes, index, child) {
2174  if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2175  node->rest = child;
2176  found_rest = true;
2177  } else if (found_rest) {
2178  pm_node_list_append(&node->posts, child);
2179  } else {
2180  pm_node_list_append(&node->requireds, child);
2181  }
2182  }
2183 
2184  return node;
2185 }
2186 
2190 static pm_array_pattern_node_t *
2191 pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2192  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2193 
2194  *node = (pm_array_pattern_node_t) {
2195  {
2196  .type = PM_ARRAY_PATTERN_NODE,
2197  .node_id = PM_NODE_IDENTIFY(parser),
2198  .location = rest->location,
2199  },
2200  .constant = NULL,
2201  .rest = rest,
2202  .requireds = { 0 },
2203  .posts = { 0 },
2204  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2205  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2206  };
2207 
2208  return node;
2209 }
2210 
2215 static pm_array_pattern_node_t *
2216 pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2217  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2218 
2219  *node = (pm_array_pattern_node_t) {
2220  {
2221  .type = PM_ARRAY_PATTERN_NODE,
2222  .node_id = PM_NODE_IDENTIFY(parser),
2223  .location = {
2224  .start = constant->location.start,
2225  .end = closing->end
2226  },
2227  },
2228  .constant = constant,
2229  .rest = NULL,
2230  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2231  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2232  .requireds = { 0 },
2233  .posts = { 0 }
2234  };
2235 
2236  return node;
2237 }
2238 
2243 static pm_array_pattern_node_t *
2244 pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2245  pm_array_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_array_pattern_node_t);
2246 
2247  *node = (pm_array_pattern_node_t) {
2248  {
2249  .type = PM_ARRAY_PATTERN_NODE,
2250  .node_id = PM_NODE_IDENTIFY(parser),
2251  .location = {
2252  .start = opening->start,
2253  .end = closing->end
2254  },
2255  },
2256  .constant = NULL,
2257  .rest = NULL,
2258  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2259  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
2260  .requireds = { 0 },
2261  .posts = { 0 }
2262  };
2263 
2264  return node;
2265 }
2266 
2267 static inline void
2268 pm_array_pattern_node_requireds_append(pm_array_pattern_node_t *node, pm_node_t *inner) {
2269  pm_node_list_append(&node->requireds, inner);
2270 }
2271 
2275 static pm_assoc_node_t *
2276 pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2277  pm_assoc_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_node_t);
2278  const uint8_t *end;
2279 
2280  if (value != NULL && value->location.end > key->location.end) {
2281  end = value->location.end;
2282  } else if (operator->type != PM_TOKEN_NOT_PROVIDED) {
2283  end = operator->end;
2284  } else {
2285  end = key->location.end;
2286  }
2287 
2288  // Hash string keys will be frozen, so we can mark them as frozen here so
2289  // that the compiler picks them up and also when we check for static literal
2290  // on the keys it gets factored in.
2291  if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2292  key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2293  }
2294 
2295  // If the key and value of this assoc node are both static literals, then
2296  // we can mark this node as a static literal.
2297  pm_node_flags_t flags = 0;
2298  if (
2299  !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2300  value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2301  ) {
2302  flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2303  }
2304 
2305  *node = (pm_assoc_node_t) {
2306  {
2307  .type = PM_ASSOC_NODE,
2308  .flags = flags,
2309  .node_id = PM_NODE_IDENTIFY(parser),
2310  .location = {
2311  .start = key->location.start,
2312  .end = end
2313  },
2314  },
2315  .key = key,
2316  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
2317  .value = value
2318  };
2319 
2320  return node;
2321 }
2322 
2326 static pm_assoc_splat_node_t *
2327 pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2328  assert(operator->type == PM_TOKEN_USTAR_STAR);
2329  pm_assoc_splat_node_t *node = PM_NODE_ALLOC(parser, pm_assoc_splat_node_t);
2330 
2331  *node = (pm_assoc_splat_node_t) {
2332  {
2333  .type = PM_ASSOC_SPLAT_NODE,
2334  .node_id = PM_NODE_IDENTIFY(parser),
2335  .location = {
2336  .start = operator->start,
2337  .end = value == NULL ? operator->end : value->location.end
2338  },
2339  },
2340  .value = value,
2341  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2342  };
2343 
2344  return node;
2345 }
2346 
2350 static pm_back_reference_read_node_t *
2351 pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2352  assert(name->type == PM_TOKEN_BACK_REFERENCE);
2353  pm_back_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_back_reference_read_node_t);
2354 
2355  *node = (pm_back_reference_read_node_t) {
2356  {
2357  .type = PM_BACK_REFERENCE_READ_NODE,
2358  .node_id = PM_NODE_IDENTIFY(parser),
2359  .location = PM_LOCATION_TOKEN_VALUE(name),
2360  },
2361  .name = pm_parser_constant_id_token(parser, name)
2362  };
2363 
2364  return node;
2365 }
2366 
2370 static pm_begin_node_t *
2371 pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2372  pm_begin_node_t *node = PM_NODE_ALLOC(parser, pm_begin_node_t);
2373 
2374  *node = (pm_begin_node_t) {
2375  {
2376  .type = PM_BEGIN_NODE,
2377  .node_id = PM_NODE_IDENTIFY(parser),
2378  .location = {
2379  .start = begin_keyword->start,
2380  .end = statements == NULL ? begin_keyword->end : statements->base.location.end
2381  },
2382  },
2383  .begin_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(begin_keyword),
2384  .statements = statements,
2385  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
2386  };
2387 
2388  return node;
2389 }
2390 
2394 static void
2395 pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2396  // If the begin keyword doesn't exist, we set the start on the begin_node
2397  if (!node->begin_keyword_loc.start) {
2398  node->base.location.start = rescue_clause->base.location.start;
2399  }
2400  node->base.location.end = rescue_clause->base.location.end;
2401  node->rescue_clause = rescue_clause;
2402 }
2403 
2407 static void
2408 pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2409  node->base.location.end = else_clause->base.location.end;
2410  node->else_clause = else_clause;
2411 }
2412 
2416 static void
2417 pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2418  node->base.location.end = ensure_clause->base.location.end;
2419  node->ensure_clause = ensure_clause;
2420 }
2421 
2425 static void
2426 pm_begin_node_end_keyword_set(pm_begin_node_t *node, const pm_token_t *end_keyword) {
2427  assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == PM_TOKEN_MISSING);
2428 
2429  node->base.location.end = end_keyword->end;
2430  node->end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword);
2431 }
2432 
2436 static pm_block_argument_node_t *
2437 pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2438  pm_block_argument_node_t *node = PM_NODE_ALLOC(parser, pm_block_argument_node_t);
2439 
2440  *node = (pm_block_argument_node_t) {
2441  {
2442  .type = PM_BLOCK_ARGUMENT_NODE,
2443  .node_id = PM_NODE_IDENTIFY(parser),
2444  .location = {
2445  .start = operator->start,
2446  .end = expression == NULL ? operator->end : expression->location.end
2447  },
2448  },
2449  .expression = expression,
2450  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2451  };
2452 
2453  return node;
2454 }
2455 
2459 static pm_block_node_t *
2460 pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2461  pm_block_node_t *node = PM_NODE_ALLOC(parser, pm_block_node_t);
2462 
2463  *node = (pm_block_node_t) {
2464  {
2465  .type = PM_BLOCK_NODE,
2466  .node_id = PM_NODE_IDENTIFY(parser),
2467  .location = { .start = opening->start, .end = closing->end },
2468  },
2469  .locals = *locals,
2470  .parameters = parameters,
2471  .body = body,
2472  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
2473  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
2474  };
2475 
2476  return node;
2477 }
2478 
2482 static pm_block_parameter_node_t *
2483 pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2484  assert(operator->type == PM_TOKEN_NOT_PROVIDED || operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2485  pm_block_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameter_node_t);
2486 
2487  *node = (pm_block_parameter_node_t) {
2488  {
2489  .type = PM_BLOCK_PARAMETER_NODE,
2490  .node_id = PM_NODE_IDENTIFY(parser),
2491  .location = {
2492  .start = operator->start,
2493  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
2494  },
2495  },
2496  .name = pm_parser_optional_constant_id_token(parser, name),
2497  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
2498  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
2499  };
2500 
2501  return node;
2502 }
2503 
2507 static pm_block_parameters_node_t *
2508 pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2509  pm_block_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_block_parameters_node_t);
2510 
2511  const uint8_t *start;
2512  if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2513  start = opening->start;
2514  } else if (parameters != NULL) {
2515  start = parameters->base.location.start;
2516  } else {
2517  start = NULL;
2518  }
2519 
2520  const uint8_t *end;
2521  if (parameters != NULL) {
2522  end = parameters->base.location.end;
2523  } else if (opening->type != PM_TOKEN_NOT_PROVIDED) {
2524  end = opening->end;
2525  } else {
2526  end = NULL;
2527  }
2528 
2529  *node = (pm_block_parameters_node_t) {
2530  {
2531  .type = PM_BLOCK_PARAMETERS_NODE,
2532  .node_id = PM_NODE_IDENTIFY(parser),
2533  .location = {
2534  .start = start,
2535  .end = end
2536  }
2537  },
2538  .parameters = parameters,
2539  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
2540  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2541  .locals = { 0 }
2542  };
2543 
2544  return node;
2545 }
2546 
2550 static void
2551 pm_block_parameters_node_closing_set(pm_block_parameters_node_t *node, const pm_token_t *closing) {
2552  assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == PM_TOKEN_MISSING);
2553 
2554  node->base.location.end = closing->end;
2555  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
2556 }
2557 
2561 static pm_block_local_variable_node_t *
2562 pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2563  pm_block_local_variable_node_t *node = PM_NODE_ALLOC(parser, pm_block_local_variable_node_t);
2564 
2565  *node = (pm_block_local_variable_node_t) {
2566  {
2567  .type = PM_BLOCK_LOCAL_VARIABLE_NODE,
2568  .node_id = PM_NODE_IDENTIFY(parser),
2569  .location = PM_LOCATION_TOKEN_VALUE(name),
2570  },
2571  .name = pm_parser_constant_id_token(parser, name)
2572  };
2573 
2574  return node;
2575 }
2576 
2580 static void
2581 pm_block_parameters_node_append_local(pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2582  pm_node_list_append(&node->locals, (pm_node_t *) local);
2583 
2584  if (node->base.location.start == NULL) node->base.location.start = local->base.location.start;
2585  node->base.location.end = local->base.location.end;
2586 }
2587 
2591 static pm_break_node_t *
2592 pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2593  assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2594  pm_break_node_t *node = PM_NODE_ALLOC(parser, pm_break_node_t);
2595 
2596  *node = (pm_break_node_t) {
2597  {
2598  .type = PM_BREAK_NODE,
2599  .node_id = PM_NODE_IDENTIFY(parser),
2600  .location = {
2601  .start = keyword->start,
2602  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
2603  },
2604  },
2605  .arguments = arguments,
2606  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
2607  };
2608 
2609  return node;
2610 }
2611 
2612 // There are certain flags that we want to use internally but don't want to
2613 // expose because they are not relevant beyond parsing. Therefore we'll define
2614 // them here and not define them in config.yml/a header file.
2615 static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = 0x4;
2616 static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = 0x40;
2617 static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = 0x80;
2618 static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = 0x100;
2619 
2625 static pm_call_node_t *
2626 pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2627  pm_call_node_t *node = PM_NODE_ALLOC(parser, pm_call_node_t);
2628 
2629  *node = (pm_call_node_t) {
2630  {
2631  .type = PM_CALL_NODE,
2632  .flags = flags,
2633  .node_id = PM_NODE_IDENTIFY(parser),
2634  .location = PM_LOCATION_NULL_VALUE(parser),
2635  },
2636  .receiver = NULL,
2637  .call_operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2638  .message_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2639  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2640  .arguments = NULL,
2641  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
2642  .block = NULL,
2643  .name = 0
2644  };
2645 
2646  return node;
2647 }
2648 
2653 static inline pm_node_flags_t
2654 pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2655  return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2656 }
2657 
2662 static pm_call_node_t *
2663 pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2664  pm_assert_value_expression(parser, receiver);
2665 
2666  pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2667  if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2668  flags |= PM_CALL_NODE_FLAGS_INDEX;
2669  }
2670 
2671  pm_call_node_t *node = pm_call_node_create(parser, flags);
2672 
2673  node->base.location.start = receiver->location.start;
2674  node->base.location.end = pm_arguments_end(arguments);
2675 
2676  node->receiver = receiver;
2677  node->message_loc.start = arguments->opening_loc.start;
2678  node->message_loc.end = arguments->closing_loc.end;
2679 
2680  node->opening_loc = arguments->opening_loc;
2681  node->arguments = arguments->arguments;
2682  node->closing_loc = arguments->closing_loc;
2683  node->block = arguments->block;
2684 
2685  node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2686  return node;
2687 }
2688 
2692 static pm_call_node_t *
2693 pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2694  pm_assert_value_expression(parser, receiver);
2695  pm_assert_value_expression(parser, argument);
2696 
2697  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2698 
2699  node->base.location.start = MIN(receiver->location.start, argument->location.start);
2700  node->base.location.end = MAX(receiver->location.end, argument->location.end);
2701 
2702  node->receiver = receiver;
2703  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2704 
2705  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2706  pm_arguments_node_arguments_append(arguments, argument);
2707  node->arguments = arguments;
2708 
2709  node->name = pm_parser_constant_id_token(parser, operator);
2710  return node;
2711 }
2712 
2716 static pm_call_node_t *
2717 pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2718  pm_assert_value_expression(parser, receiver);
2719 
2720  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2721 
2722  node->base.location.start = receiver->location.start;
2723  const uint8_t *end = pm_arguments_end(arguments);
2724  if (end == NULL) {
2725  end = message->end;
2726  }
2727  node->base.location.end = end;
2728 
2729  node->receiver = receiver;
2730  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2731  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2732  node->opening_loc = arguments->opening_loc;
2733  node->arguments = arguments->arguments;
2734  node->closing_loc = arguments->closing_loc;
2735  node->block = arguments->block;
2736 
2737  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2738  pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2739  }
2740 
2741  node->name = pm_parser_constant_id_token(parser, message);
2742  return node;
2743 }
2744 
2748 static pm_call_node_t *
2749 pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2750  pm_call_node_t *node = pm_call_node_create(parser, 0);
2751  node->base.location.start = parser->start;
2752  node->base.location.end = parser->end;
2753 
2754  node->receiver = receiver;
2755  node->call_operator_loc = (pm_location_t) { .start = NULL, .end = NULL };
2756  node->message_loc = (pm_location_t) { .start = NULL, .end = NULL };
2757  node->arguments = arguments;
2758 
2759  node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2760  return node;
2761 }
2762 
2767 static pm_call_node_t *
2768 pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2769  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2770 
2771  node->base.location.start = message->start;
2772  node->base.location.end = pm_arguments_end(arguments);
2773 
2774  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2775  node->opening_loc = arguments->opening_loc;
2776  node->arguments = arguments->arguments;
2777  node->closing_loc = arguments->closing_loc;
2778  node->block = arguments->block;
2779 
2780  node->name = pm_parser_constant_id_token(parser, message);
2781  return node;
2782 }
2783 
2788 static pm_call_node_t *
2789 pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2790  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2791 
2792  node->base.location = PM_LOCATION_NULL_VALUE(parser);
2793  node->arguments = arguments;
2794 
2795  node->name = name;
2796  return node;
2797 }
2798 
2802 static pm_call_node_t *
2803 pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2804  pm_assert_value_expression(parser, receiver);
2805  if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2806 
2807  pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2808 
2809  node->base.location.start = message->start;
2810  if (arguments->closing_loc.start != NULL) {
2811  node->base.location.end = arguments->closing_loc.end;
2812  } else {
2813  assert(receiver != NULL);
2814  node->base.location.end = receiver->location.end;
2815  }
2816 
2817  node->receiver = receiver;
2818  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2819  node->opening_loc = arguments->opening_loc;
2820  node->arguments = arguments->arguments;
2821  node->closing_loc = arguments->closing_loc;
2822 
2823  node->name = pm_parser_constant_id_constant(parser, "!", 1);
2824  return node;
2825 }
2826 
2830 static pm_call_node_t *
2831 pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2832  pm_assert_value_expression(parser, receiver);
2833 
2834  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2835 
2836  node->base.location.start = receiver->location.start;
2837  node->base.location.end = pm_arguments_end(arguments);
2838 
2839  node->receiver = receiver;
2840  node->call_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2841  node->opening_loc = arguments->opening_loc;
2842  node->arguments = arguments->arguments;
2843  node->closing_loc = arguments->closing_loc;
2844  node->block = arguments->block;
2845 
2846  if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2847  pm_node_flag_set((pm_node_t *)node, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2848  }
2849 
2850  node->name = pm_parser_constant_id_constant(parser, "call", 4);
2851  return node;
2852 }
2853 
2857 static pm_call_node_t *
2858 pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2859  pm_assert_value_expression(parser, receiver);
2860 
2861  pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2862 
2863  node->base.location.start = operator->start;
2864  node->base.location.end = receiver->location.end;
2865 
2866  node->receiver = receiver;
2867  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
2868 
2869  node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2870  return node;
2871 }
2872 
2877 static pm_call_node_t *
2878 pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2879  pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2880 
2881  node->base.location = PM_LOCATION_TOKEN_VALUE(message);
2882  node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
2883 
2884  node->name = pm_parser_constant_id_token(parser, message);
2885  return node;
2886 }
2887 
2892 static inline bool
2893 pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2894  return (
2895  (node->message_loc.start != NULL) &&
2896  (node->message_loc.end[-1] != '!') &&
2897  (node->message_loc.end[-1] != '?') &&
2898  char_is_identifier_start(parser, node->message_loc.start) &&
2899  (node->opening_loc.start == NULL) &&
2900  (node->arguments == NULL) &&
2901  (node->block == NULL)
2902  );
2903 }
2904 
2908 static void
2909 pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2910  pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2911 
2912  if (write_constant->length > 0) {
2913  size_t length = write_constant->length - 1;
2914 
2915  void *memory = xmalloc(length);
2916  memcpy(memory, write_constant->start, length);
2917 
2918  *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2919  } else {
2920  // We can get here if the message was missing because of a syntax error.
2921  *read_name = pm_parser_constant_id_constant(parser, "", 0);
2922  }
2923 }
2924 
2928 static pm_call_and_write_node_t *
2929 pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2930  assert(target->block == NULL);
2931  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2932  pm_call_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_and_write_node_t);
2933 
2934  *node = (pm_call_and_write_node_t) {
2935  {
2936  .type = PM_CALL_AND_WRITE_NODE,
2937  .flags = target->base.flags,
2938  .node_id = PM_NODE_IDENTIFY(parser),
2939  .location = {
2940  .start = target->base.location.start,
2941  .end = value->location.end
2942  }
2943  },
2944  .receiver = target->receiver,
2945  .call_operator_loc = target->call_operator_loc,
2946  .message_loc = target->message_loc,
2947  .read_name = 0,
2948  .write_name = target->name,
2949  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
2950  .value = value
2951  };
2952 
2953  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2954 
2955  // Here we're going to free the target, since it is no longer necessary.
2956  // However, we don't want to call `pm_node_destroy` because we want to keep
2957  // around all of its children since we just reused them.
2958  xfree(target);
2959 
2960  return node;
2961 }
2962 
2967 static void
2968 pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2969  if (parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) {
2970  if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2971  pm_node_t *node;
2972  PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2973  if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2974  pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2975  break;
2976  }
2977  }
2978  }
2979 
2980  if (block != NULL) {
2981  pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2982  }
2983  }
2984 }
2985 
2989 static pm_index_and_write_node_t *
2990 pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2991  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2992  pm_index_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_and_write_node_t);
2993 
2994  pm_index_arguments_check(parser, target->arguments, target->block);
2995 
2996  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2997  *node = (pm_index_and_write_node_t) {
2998  {
2999  .type = PM_INDEX_AND_WRITE_NODE,
3000  .flags = target->base.flags,
3001  .node_id = PM_NODE_IDENTIFY(parser),
3002  .location = {
3003  .start = target->base.location.start,
3004  .end = value->location.end
3005  }
3006  },
3007  .receiver = target->receiver,
3008  .call_operator_loc = target->call_operator_loc,
3009  .opening_loc = target->opening_loc,
3010  .arguments = target->arguments,
3011  .closing_loc = target->closing_loc,
3012  .block = (pm_block_argument_node_t *) target->block,
3013  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3014  .value = value
3015  };
3016 
3017  // Here we're going to free the target, since it is no longer necessary.
3018  // However, we don't want to call `pm_node_destroy` because we want to keep
3019  // around all of its children since we just reused them.
3020  xfree(target);
3021 
3022  return node;
3023 }
3024 
3028 static pm_call_operator_write_node_t *
3029 pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3030  assert(target->block == NULL);
3031  pm_call_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_operator_write_node_t);
3032 
3033  *node = (pm_call_operator_write_node_t) {
3034  {
3035  .type = PM_CALL_OPERATOR_WRITE_NODE,
3036  .flags = target->base.flags,
3037  .node_id = PM_NODE_IDENTIFY(parser),
3038  .location = {
3039  .start = target->base.location.start,
3040  .end = value->location.end
3041  }
3042  },
3043  .receiver = target->receiver,
3044  .call_operator_loc = target->call_operator_loc,
3045  .message_loc = target->message_loc,
3046  .read_name = 0,
3047  .write_name = target->name,
3048  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3049  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3050  .value = value
3051  };
3052 
3053  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3054 
3055  // Here we're going to free the target, since it is no longer necessary.
3056  // However, we don't want to call `pm_node_destroy` because we want to keep
3057  // around all of its children since we just reused them.
3058  xfree(target);
3059 
3060  return node;
3061 }
3062 
3066 static pm_index_operator_write_node_t *
3067 pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3068  pm_index_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_operator_write_node_t);
3069 
3070  pm_index_arguments_check(parser, target->arguments, target->block);
3071 
3072  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3073  *node = (pm_index_operator_write_node_t) {
3074  {
3075  .type = PM_INDEX_OPERATOR_WRITE_NODE,
3076  .flags = target->base.flags,
3077  .node_id = PM_NODE_IDENTIFY(parser),
3078  .location = {
3079  .start = target->base.location.start,
3080  .end = value->location.end
3081  }
3082  },
3083  .receiver = target->receiver,
3084  .call_operator_loc = target->call_operator_loc,
3085  .opening_loc = target->opening_loc,
3086  .arguments = target->arguments,
3087  .closing_loc = target->closing_loc,
3088  .block = (pm_block_argument_node_t *) target->block,
3089  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
3090  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3091  .value = value
3092  };
3093 
3094  // Here we're going to free the target, since it is no longer necessary.
3095  // However, we don't want to call `pm_node_destroy` because we want to keep
3096  // around all of its children since we just reused them.
3097  xfree(target);
3098 
3099  return node;
3100 }
3101 
3105 static pm_call_or_write_node_t *
3106 pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3107  assert(target->block == NULL);
3108  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3109  pm_call_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_call_or_write_node_t);
3110 
3111  *node = (pm_call_or_write_node_t) {
3112  {
3113  .type = PM_CALL_OR_WRITE_NODE,
3114  .flags = target->base.flags,
3115  .node_id = PM_NODE_IDENTIFY(parser),
3116  .location = {
3117  .start = target->base.location.start,
3118  .end = value->location.end
3119  }
3120  },
3121  .receiver = target->receiver,
3122  .call_operator_loc = target->call_operator_loc,
3123  .message_loc = target->message_loc,
3124  .read_name = 0,
3125  .write_name = target->name,
3126  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3127  .value = value
3128  };
3129 
3130  pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3131 
3132  // Here we're going to free the target, since it is no longer necessary.
3133  // However, we don't want to call `pm_node_destroy` because we want to keep
3134  // around all of its children since we just reused them.
3135  xfree(target);
3136 
3137  return node;
3138 }
3139 
3143 static pm_index_or_write_node_t *
3144 pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3145  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3146  pm_index_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_index_or_write_node_t);
3147 
3148  pm_index_arguments_check(parser, target->arguments, target->block);
3149 
3150  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3151  *node = (pm_index_or_write_node_t) {
3152  {
3153  .type = PM_INDEX_OR_WRITE_NODE,
3154  .flags = target->base.flags,
3155  .node_id = PM_NODE_IDENTIFY(parser),
3156  .location = {
3157  .start = target->base.location.start,
3158  .end = value->location.end
3159  }
3160  },
3161  .receiver = target->receiver,
3162  .call_operator_loc = target->call_operator_loc,
3163  .opening_loc = target->opening_loc,
3164  .arguments = target->arguments,
3165  .closing_loc = target->closing_loc,
3166  .block = (pm_block_argument_node_t *) target->block,
3167  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3168  .value = value
3169  };
3170 
3171  // Here we're going to free the target, since it is no longer necessary.
3172  // However, we don't want to call `pm_node_destroy` because we want to keep
3173  // around all of its children since we just reused them.
3174  xfree(target);
3175 
3176  return node;
3177 }
3178 
3183 static pm_call_target_node_t *
3184 pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3185  pm_call_target_node_t *node = PM_NODE_ALLOC(parser, pm_call_target_node_t);
3186 
3187  *node = (pm_call_target_node_t) {
3188  {
3189  .type = PM_CALL_TARGET_NODE,
3190  .flags = target->base.flags,
3191  .node_id = PM_NODE_IDENTIFY(parser),
3192  .location = target->base.location
3193  },
3194  .receiver = target->receiver,
3195  .call_operator_loc = target->call_operator_loc,
3196  .name = target->name,
3197  .message_loc = target->message_loc
3198  };
3199 
3200  // Here we're going to free the target, since it is no longer necessary.
3201  // However, we don't want to call `pm_node_destroy` because we want to keep
3202  // around all of its children since we just reused them.
3203  xfree(target);
3204 
3205  return node;
3206 }
3207 
3212 static pm_index_target_node_t *
3213 pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3214  pm_index_target_node_t *node = PM_NODE_ALLOC(parser, pm_index_target_node_t);
3215  pm_node_flags_t flags = target->base.flags;
3216 
3217  pm_index_arguments_check(parser, target->arguments, target->block);
3218 
3219  assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3220  *node = (pm_index_target_node_t) {
3221  {
3222  .type = PM_INDEX_TARGET_NODE,
3223  .flags = flags | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3224  .node_id = PM_NODE_IDENTIFY(parser),
3225  .location = target->base.location
3226  },
3227  .receiver = target->receiver,
3228  .opening_loc = target->opening_loc,
3229  .arguments = target->arguments,
3230  .closing_loc = target->closing_loc,
3231  .block = (pm_block_argument_node_t *) target->block,
3232  };
3233 
3234  // Here we're going to free the target, since it is no longer necessary.
3235  // However, we don't want to call `pm_node_destroy` because we want to keep
3236  // around all of its children since we just reused them.
3237  xfree(target);
3238 
3239  return node;
3240 }
3241 
3245 static pm_capture_pattern_node_t *
3246 pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3247  pm_capture_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_capture_pattern_node_t);
3248 
3249  *node = (pm_capture_pattern_node_t) {
3250  {
3251  .type = PM_CAPTURE_PATTERN_NODE,
3252  .node_id = PM_NODE_IDENTIFY(parser),
3253  .location = {
3254  .start = value->location.start,
3255  .end = target->base.location.end
3256  },
3257  },
3258  .value = value,
3259  .target = target,
3260  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
3261  };
3262 
3263  return node;
3264 }
3265 
3269 static pm_case_node_t *
3270 pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3271  pm_case_node_t *node = PM_NODE_ALLOC(parser, pm_case_node_t);
3272 
3273  *node = (pm_case_node_t) {
3274  {
3275  .type = PM_CASE_NODE,
3276  .node_id = PM_NODE_IDENTIFY(parser),
3277  .location = {
3278  .start = case_keyword->start,
3279  .end = end_keyword->end
3280  },
3281  },
3282  .predicate = predicate,
3283  .else_clause = NULL,
3284  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3285  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3286  .conditions = { 0 }
3287  };
3288 
3289  return node;
3290 }
3291 
3295 static void
3296 pm_case_node_condition_append(pm_case_node_t *node, pm_node_t *condition) {
3297  assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3298 
3299  pm_node_list_append(&node->conditions, condition);
3300  node->base.location.end = condition->location.end;
3301 }
3302 
3306 static void
3307 pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3308  node->else_clause = else_clause;
3309  node->base.location.end = else_clause->base.location.end;
3310 }
3311 
3315 static void
3316 pm_case_node_end_keyword_loc_set(pm_case_node_t *node, const pm_token_t *end_keyword) {
3317  node->base.location.end = end_keyword->end;
3318  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3319 }
3320 
3324 static pm_case_match_node_t *
3325 pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3326  pm_case_match_node_t *node = PM_NODE_ALLOC(parser, pm_case_match_node_t);
3327 
3328  *node = (pm_case_match_node_t) {
3329  {
3330  .type = PM_CASE_MATCH_NODE,
3331  .node_id = PM_NODE_IDENTIFY(parser),
3332  .location = {
3333  .start = case_keyword->start,
3334  .end = end_keyword->end
3335  },
3336  },
3337  .predicate = predicate,
3338  .else_clause = NULL,
3339  .case_keyword_loc = PM_LOCATION_TOKEN_VALUE(case_keyword),
3340  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3341  .conditions = { 0 }
3342  };
3343 
3344  return node;
3345 }
3346 
3350 static void
3351 pm_case_match_node_condition_append(pm_case_match_node_t *node, pm_node_t *condition) {
3352  assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3353 
3354  pm_node_list_append(&node->conditions, condition);
3355  node->base.location.end = condition->location.end;
3356 }
3357 
3361 static void
3362 pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3363  node->else_clause = else_clause;
3364  node->base.location.end = else_clause->base.location.end;
3365 }
3366 
3370 static void
3371 pm_case_match_node_end_keyword_loc_set(pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3372  node->base.location.end = end_keyword->end;
3373  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
3374 }
3375 
3379 static pm_class_node_t *
3380 pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3381  pm_class_node_t *node = PM_NODE_ALLOC(parser, pm_class_node_t);
3382 
3383  *node = (pm_class_node_t) {
3384  {
3385  .type = PM_CLASS_NODE,
3386  .node_id = PM_NODE_IDENTIFY(parser),
3387  .location = { .start = class_keyword->start, .end = end_keyword->end },
3388  },
3389  .locals = *locals,
3390  .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
3391  .constant_path = constant_path,
3392  .inheritance_operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(inheritance_operator),
3393  .superclass = superclass,
3394  .body = body,
3395  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
3396  .name = pm_parser_constant_id_token(parser, name)
3397  };
3398 
3399  return node;
3400 }
3401 
3405 static pm_class_variable_and_write_node_t *
3406 pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3407  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3408  pm_class_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_and_write_node_t);
3409 
3410  *node = (pm_class_variable_and_write_node_t) {
3411  {
3412  .type = PM_CLASS_VARIABLE_AND_WRITE_NODE,
3413  .node_id = PM_NODE_IDENTIFY(parser),
3414  .location = {
3415  .start = target->base.location.start,
3416  .end = value->location.end
3417  }
3418  },
3419  .name = target->name,
3420  .name_loc = target->base.location,
3421  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3422  .value = value
3423  };
3424 
3425  return node;
3426 }
3427 
3431 static pm_class_variable_operator_write_node_t *
3432 pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3433  pm_class_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_operator_write_node_t);
3434 
3435  *node = (pm_class_variable_operator_write_node_t) {
3436  {
3437  .type = PM_CLASS_VARIABLE_OPERATOR_WRITE_NODE,
3438  .node_id = PM_NODE_IDENTIFY(parser),
3439  .location = {
3440  .start = target->base.location.start,
3441  .end = value->location.end
3442  }
3443  },
3444  .name = target->name,
3445  .name_loc = target->base.location,
3446  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3447  .value = value,
3448  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3449  };
3450 
3451  return node;
3452 }
3453 
3457 static pm_class_variable_or_write_node_t *
3458 pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3459  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3460  pm_class_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_or_write_node_t);
3461 
3462  *node = (pm_class_variable_or_write_node_t) {
3463  {
3464  .type = PM_CLASS_VARIABLE_OR_WRITE_NODE,
3465  .node_id = PM_NODE_IDENTIFY(parser),
3466  .location = {
3467  .start = target->base.location.start,
3468  .end = value->location.end
3469  }
3470  },
3471  .name = target->name,
3472  .name_loc = target->base.location,
3473  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3474  .value = value
3475  };
3476 
3477  return node;
3478 }
3479 
3483 static pm_class_variable_read_node_t *
3484 pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3485  assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3486  pm_class_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_read_node_t);
3487 
3488  *node = (pm_class_variable_read_node_t) {
3489  {
3490  .type = PM_CLASS_VARIABLE_READ_NODE,
3491  .node_id = PM_NODE_IDENTIFY(parser),
3492  .location = PM_LOCATION_TOKEN_VALUE(token)
3493  },
3494  .name = pm_parser_constant_id_token(parser, token)
3495  };
3496 
3497  return node;
3498 }
3499 
3506 static inline pm_node_flags_t
3507 pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3508  if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.start == NULL) {
3509  return flags;
3510  }
3511  return 0;
3512 }
3513 
3517 static pm_class_variable_write_node_t *
3518 pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3519  pm_class_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_class_variable_write_node_t);
3520 
3521  *node = (pm_class_variable_write_node_t) {
3522  {
3523  .type = PM_CLASS_VARIABLE_WRITE_NODE,
3524  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3525  .node_id = PM_NODE_IDENTIFY(parser),
3526  .location = {
3527  .start = read_node->base.location.start,
3528  .end = value->location.end
3529  },
3530  },
3531  .name = read_node->name,
3532  .name_loc = PM_LOCATION_NODE_VALUE((pm_node_t *) read_node),
3533  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3534  .value = value
3535  };
3536 
3537  return node;
3538 }
3539 
3543 static pm_constant_path_and_write_node_t *
3544 pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3545  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3546  pm_constant_path_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_and_write_node_t);
3547 
3548  *node = (pm_constant_path_and_write_node_t) {
3549  {
3550  .type = PM_CONSTANT_PATH_AND_WRITE_NODE,
3551  .node_id = PM_NODE_IDENTIFY(parser),
3552  .location = {
3553  .start = target->base.location.start,
3554  .end = value->location.end
3555  }
3556  },
3557  .target = target,
3558  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3559  .value = value
3560  };
3561 
3562  return node;
3563 }
3564 
3568 static pm_constant_path_operator_write_node_t *
3569 pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3570  pm_constant_path_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_operator_write_node_t);
3571 
3572  *node = (pm_constant_path_operator_write_node_t) {
3573  {
3574  .type = PM_CONSTANT_PATH_OPERATOR_WRITE_NODE,
3575  .node_id = PM_NODE_IDENTIFY(parser),
3576  .location = {
3577  .start = target->base.location.start,
3578  .end = value->location.end
3579  }
3580  },
3581  .target = target,
3582  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3583  .value = value,
3584  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3585  };
3586 
3587  return node;
3588 }
3589 
3593 static pm_constant_path_or_write_node_t *
3594 pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3595  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3596  pm_constant_path_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_or_write_node_t);
3597 
3598  *node = (pm_constant_path_or_write_node_t) {
3599  {
3600  .type = PM_CONSTANT_PATH_OR_WRITE_NODE,
3601  .node_id = PM_NODE_IDENTIFY(parser),
3602  .location = {
3603  .start = target->base.location.start,
3604  .end = value->location.end
3605  }
3606  },
3607  .target = target,
3608  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3609  .value = value
3610  };
3611 
3612  return node;
3613 }
3614 
3618 static pm_constant_path_node_t *
3619 pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3620  pm_assert_value_expression(parser, parent);
3621  pm_constant_path_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_node_t);
3622 
3623  pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3624  if (name_token->type == PM_TOKEN_CONSTANT) {
3625  name = pm_parser_constant_id_token(parser, name_token);
3626  }
3627 
3628  *node = (pm_constant_path_node_t) {
3629  {
3630  .type = PM_CONSTANT_PATH_NODE,
3631  .node_id = PM_NODE_IDENTIFY(parser),
3632  .location = {
3633  .start = parent == NULL ? delimiter->start : parent->location.start,
3634  .end = name_token->end
3635  },
3636  },
3637  .parent = parent,
3638  .name = name,
3639  .delimiter_loc = PM_LOCATION_TOKEN_VALUE(delimiter),
3640  .name_loc = PM_LOCATION_TOKEN_VALUE(name_token)
3641  };
3642 
3643  return node;
3644 }
3645 
3649 static pm_constant_path_write_node_t *
3650 pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3651  pm_constant_path_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_path_write_node_t);
3652 
3653  *node = (pm_constant_path_write_node_t) {
3654  {
3655  .type = PM_CONSTANT_PATH_WRITE_NODE,
3656  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3657  .node_id = PM_NODE_IDENTIFY(parser),
3658  .location = {
3659  .start = target->base.location.start,
3660  .end = value->location.end
3661  },
3662  },
3663  .target = target,
3664  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3665  .value = value
3666  };
3667 
3668  return node;
3669 }
3670 
3674 static pm_constant_and_write_node_t *
3675 pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3676  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3677  pm_constant_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_and_write_node_t);
3678 
3679  *node = (pm_constant_and_write_node_t) {
3680  {
3681  .type = PM_CONSTANT_AND_WRITE_NODE,
3682  .node_id = PM_NODE_IDENTIFY(parser),
3683  .location = {
3684  .start = target->base.location.start,
3685  .end = value->location.end
3686  }
3687  },
3688  .name = target->name,
3689  .name_loc = target->base.location,
3690  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3691  .value = value
3692  };
3693 
3694  return node;
3695 }
3696 
3700 static pm_constant_operator_write_node_t *
3701 pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3702  pm_constant_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_operator_write_node_t);
3703 
3704  *node = (pm_constant_operator_write_node_t) {
3705  {
3706  .type = PM_CONSTANT_OPERATOR_WRITE_NODE,
3707  .node_id = PM_NODE_IDENTIFY(parser),
3708  .location = {
3709  .start = target->base.location.start,
3710  .end = value->location.end
3711  }
3712  },
3713  .name = target->name,
3714  .name_loc = target->base.location,
3715  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3716  .value = value,
3717  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
3718  };
3719 
3720  return node;
3721 }
3722 
3726 static pm_constant_or_write_node_t *
3727 pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3728  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3729  pm_constant_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_or_write_node_t);
3730 
3731  *node = (pm_constant_or_write_node_t) {
3732  {
3733  .type = PM_CONSTANT_OR_WRITE_NODE,
3734  .node_id = PM_NODE_IDENTIFY(parser),
3735  .location = {
3736  .start = target->base.location.start,
3737  .end = value->location.end
3738  }
3739  },
3740  .name = target->name,
3741  .name_loc = target->base.location,
3742  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3743  .value = value
3744  };
3745 
3746  return node;
3747 }
3748 
3752 static pm_constant_read_node_t *
3753 pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3754  assert(name->type == PM_TOKEN_CONSTANT || name->type == PM_TOKEN_MISSING);
3755  pm_constant_read_node_t *node = PM_NODE_ALLOC(parser, pm_constant_read_node_t);
3756 
3757  *node = (pm_constant_read_node_t) {
3758  {
3759  .type = PM_CONSTANT_READ_NODE,
3760  .node_id = PM_NODE_IDENTIFY(parser),
3761  .location = PM_LOCATION_TOKEN_VALUE(name)
3762  },
3763  .name = pm_parser_constant_id_token(parser, name)
3764  };
3765 
3766  return node;
3767 }
3768 
3772 static pm_constant_write_node_t *
3773 pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3774  pm_constant_write_node_t *node = PM_NODE_ALLOC(parser, pm_constant_write_node_t);
3775 
3776  *node = (pm_constant_write_node_t) {
3777  {
3778  .type = PM_CONSTANT_WRITE_NODE,
3779  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3780  .node_id = PM_NODE_IDENTIFY(parser),
3781  .location = {
3782  .start = target->base.location.start,
3783  .end = value->location.end
3784  }
3785  },
3786  .name = target->name,
3787  .name_loc = target->base.location,
3788  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3789  .value = value
3790  };
3791 
3792  return node;
3793 }
3794 
3798 static void
3799 pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3800  switch (PM_NODE_TYPE(node)) {
3801  case PM_BEGIN_NODE: {
3802  const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3803  if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
3804  break;
3805  }
3806  case PM_PARENTHESES_NODE: {
3807  const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3808  if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3809  break;
3810  }
3811  case PM_STATEMENTS_NODE: {
3812  const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3813  pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3814  break;
3815  }
3816  case PM_ARRAY_NODE:
3817  case PM_FLOAT_NODE:
3818  case PM_IMAGINARY_NODE:
3819  case PM_INTEGER_NODE:
3820  case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3821  case PM_INTERPOLATED_STRING_NODE:
3822  case PM_INTERPOLATED_SYMBOL_NODE:
3823  case PM_INTERPOLATED_X_STRING_NODE:
3824  case PM_RATIONAL_NODE:
3825  case PM_REGULAR_EXPRESSION_NODE:
3826  case PM_SOURCE_ENCODING_NODE:
3827  case PM_SOURCE_FILE_NODE:
3828  case PM_SOURCE_LINE_NODE:
3829  case PM_STRING_NODE:
3830  case PM_SYMBOL_NODE:
3831  case PM_X_STRING_NODE:
3832  pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3833  break;
3834  default:
3835  break;
3836  }
3837 }
3838 
3842 static pm_def_node_t *
3843 pm_def_node_create(
3844  pm_parser_t *parser,
3845  pm_constant_id_t name,
3846  const pm_token_t *name_loc,
3847  pm_node_t *receiver,
3848  pm_parameters_node_t *parameters,
3849  pm_node_t *body,
3850  pm_constant_id_list_t *locals,
3851  const pm_token_t *def_keyword,
3852  const pm_token_t *operator,
3853  const pm_token_t *lparen,
3854  const pm_token_t *rparen,
3855  const pm_token_t *equal,
3856  const pm_token_t *end_keyword
3857 ) {
3858  pm_def_node_t *node = PM_NODE_ALLOC(parser, pm_def_node_t);
3859  const uint8_t *end;
3860 
3861  if (end_keyword->type == PM_TOKEN_NOT_PROVIDED) {
3862  end = body->location.end;
3863  } else {
3864  end = end_keyword->end;
3865  }
3866 
3867  if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
3868  pm_def_node_receiver_check(parser, receiver);
3869  }
3870 
3871  *node = (pm_def_node_t) {
3872  {
3873  .type = PM_DEF_NODE,
3874  .node_id = PM_NODE_IDENTIFY(parser),
3875  .location = { .start = def_keyword->start, .end = end },
3876  },
3877  .name = name,
3878  .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
3879  .receiver = receiver,
3880  .parameters = parameters,
3881  .body = body,
3882  .locals = *locals,
3883  .def_keyword_loc = PM_LOCATION_TOKEN_VALUE(def_keyword),
3884  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
3885  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3886  .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3887  .equal_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(equal),
3888  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3889  };
3890 
3891  return node;
3892 }
3893 
3897 static pm_defined_node_t *
3898 pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_location_t *keyword_loc) {
3899  pm_defined_node_t *node = PM_NODE_ALLOC(parser, pm_defined_node_t);
3900 
3901  *node = (pm_defined_node_t) {
3902  {
3903  .type = PM_DEFINED_NODE,
3904  .node_id = PM_NODE_IDENTIFY(parser),
3905  .location = {
3906  .start = keyword_loc->start,
3907  .end = (rparen->type == PM_TOKEN_NOT_PROVIDED ? value->location.end : rparen->end)
3908  },
3909  },
3910  .lparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(lparen),
3911  .value = value,
3912  .rparen_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(rparen),
3913  .keyword_loc = *keyword_loc
3914  };
3915 
3916  return node;
3917 }
3918 
3922 static pm_else_node_t *
3923 pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3924  pm_else_node_t *node = PM_NODE_ALLOC(parser, pm_else_node_t);
3925  const uint8_t *end = NULL;
3926  if ((end_keyword->type == PM_TOKEN_NOT_PROVIDED) && (statements != NULL)) {
3927  end = statements->base.location.end;
3928  } else {
3929  end = end_keyword->end;
3930  }
3931 
3932  *node = (pm_else_node_t) {
3933  {
3934  .type = PM_ELSE_NODE,
3935  .node_id = PM_NODE_IDENTIFY(parser),
3936  .location = {
3937  .start = else_keyword->start,
3938  .end = end,
3939  },
3940  },
3941  .else_keyword_loc = PM_LOCATION_TOKEN_VALUE(else_keyword),
3942  .statements = statements,
3943  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
3944  };
3945 
3946  return node;
3947 }
3948 
3952 static pm_embedded_statements_node_t *
3953 pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3954  pm_embedded_statements_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_statements_node_t);
3955 
3956  *node = (pm_embedded_statements_node_t) {
3957  {
3958  .type = PM_EMBEDDED_STATEMENTS_NODE,
3959  .node_id = PM_NODE_IDENTIFY(parser),
3960  .location = {
3961  .start = opening->start,
3962  .end = closing->end
3963  }
3964  },
3965  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
3966  .statements = statements,
3967  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
3968  };
3969 
3970  return node;
3971 }
3972 
3976 static pm_embedded_variable_node_t *
3977 pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3978  pm_embedded_variable_node_t *node = PM_NODE_ALLOC(parser, pm_embedded_variable_node_t);
3979 
3980  *node = (pm_embedded_variable_node_t) {
3981  {
3982  .type = PM_EMBEDDED_VARIABLE_NODE,
3983  .node_id = PM_NODE_IDENTIFY(parser),
3984  .location = {
3985  .start = operator->start,
3986  .end = variable->location.end
3987  }
3988  },
3989  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
3990  .variable = variable
3991  };
3992 
3993  return node;
3994 }
3995 
3999 static pm_ensure_node_t *
4000 pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
4001  pm_ensure_node_t *node = PM_NODE_ALLOC(parser, pm_ensure_node_t);
4002 
4003  *node = (pm_ensure_node_t) {
4004  {
4005  .type = PM_ENSURE_NODE,
4006  .node_id = PM_NODE_IDENTIFY(parser),
4007  .location = {
4008  .start = ensure_keyword->start,
4009  .end = end_keyword->end
4010  },
4011  },
4012  .ensure_keyword_loc = PM_LOCATION_TOKEN_VALUE(ensure_keyword),
4013  .statements = statements,
4014  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4015  };
4016 
4017  return node;
4018 }
4019 
4023 static pm_false_node_t *
4024 pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
4025  assert(token->type == PM_TOKEN_KEYWORD_FALSE);
4026  pm_false_node_t *node = PM_NODE_ALLOC(parser, pm_false_node_t);
4027 
4028  *node = (pm_false_node_t) {{
4029  .type = PM_FALSE_NODE,
4030  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4031  .node_id = PM_NODE_IDENTIFY(parser),
4032  .location = PM_LOCATION_TOKEN_VALUE(token)
4033  }};
4034 
4035  return node;
4036 }
4037 
4042 static pm_find_pattern_node_t *
4043 pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
4044  pm_find_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_find_pattern_node_t);
4045 
4046  pm_node_t *left = nodes->nodes[0];
4047  assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
4048  pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
4049 
4050  pm_node_t *right;
4051 
4052  if (nodes->size == 1) {
4053  right = (pm_node_t *) pm_missing_node_create(parser, left->location.end, left->location.end);
4054  } else {
4055  right = nodes->nodes[nodes->size - 1];
4056  assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
4057  }
4058 
4059 #if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
4060  // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
4061  // The resulting AST will anyway be ignored, but this file still needs to compile.
4062  pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
4063 #else
4064  pm_node_t *right_splat_node = right;
4065 #endif
4066  *node = (pm_find_pattern_node_t) {
4067  {
4068  .type = PM_FIND_PATTERN_NODE,
4069  .node_id = PM_NODE_IDENTIFY(parser),
4070  .location = {
4071  .start = left->location.start,
4072  .end = right->location.end,
4073  },
4074  },
4075  .constant = NULL,
4076  .left = left_splat_node,
4077  .right = right_splat_node,
4078  .requireds = { 0 },
4079  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4080  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4081  };
4082 
4083  // For now we're going to just copy over each pointer manually. This could be
4084  // much more efficient, as we could instead resize the node list to only point
4085  // to 1...-1.
4086  for (size_t index = 1; index < nodes->size - 1; index++) {
4087  pm_node_list_append(&node->requireds, nodes->nodes[index]);
4088  }
4089 
4090  return node;
4091 }
4092 
4097 static double
4098 pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4099  ptrdiff_t diff = token->end - token->start;
4100  if (diff <= 0) return 0.0;
4101 
4102  // First, get a buffer of the content.
4103  size_t length = (size_t) diff;
4104  char *buffer = xmalloc(sizeof(char) * (length + 1));
4105  memcpy((void *) buffer, token->start, length);
4106 
4107  // Next, determine if we need to replace the decimal point because of
4108  // locale-specific options, and then normalize them if we have to.
4109  char decimal_point = *localeconv()->decimal_point;
4110  if (decimal_point != '.') {
4111  for (size_t index = 0; index < length; index++) {
4112  if (buffer[index] == '.') buffer[index] = decimal_point;
4113  }
4114  }
4115 
4116  // Next, handle underscores by removing them from the buffer.
4117  for (size_t index = 0; index < length; index++) {
4118  if (buffer[index] == '_') {
4119  memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
4120  length--;
4121  }
4122  }
4123 
4124  // Null-terminate the buffer so that strtod cannot read off the end.
4125  buffer[length] = '\0';
4126 
4127  // Now, call strtod to parse the value. Note that CRuby has their own
4128  // version of strtod which avoids locales. We're okay using the locale-aware
4129  // version because we've already validated through the parser that the token
4130  // is in a valid format.
4131  errno = 0;
4132  char *eptr;
4133  double value = strtod(buffer, &eptr);
4134 
4135  // This should never happen, because we've already checked that the token
4136  // is in a valid format. However it's good to be safe.
4137  if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
4138  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, (*token), PM_ERR_FLOAT_PARSE);
4139  xfree((void *) buffer);
4140  return 0.0;
4141  }
4142 
4143  // If errno is set, then it should only be ERANGE. At this point we need to
4144  // check if it's infinity (it should be).
4145  if (errno == ERANGE && isinf(value)) {
4146  int warn_width;
4147  const char *ellipsis;
4148 
4149  if (length > 20) {
4150  warn_width = 20;
4151  ellipsis = "...";
4152  } else {
4153  warn_width = (int) length;
4154  ellipsis = "";
4155  }
4156 
4157  pm_diagnostic_list_append_format(&parser->warning_list, token->start, token->end, PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
4158  value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
4159  }
4160 
4161  // Finally we can free the buffer and return the value.
4162  xfree((void *) buffer);
4163  return value;
4164 }
4165 
4169 static pm_float_node_t *
4170 pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
4171  assert(token->type == PM_TOKEN_FLOAT);
4172  pm_float_node_t *node = PM_NODE_ALLOC(parser, pm_float_node_t);
4173 
4174  *node = (pm_float_node_t) {
4175  {
4176  .type = PM_FLOAT_NODE,
4177  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4178  .node_id = PM_NODE_IDENTIFY(parser),
4179  .location = PM_LOCATION_TOKEN_VALUE(token)
4180  },
4181  .value = pm_double_parse(parser, token)
4182  };
4183 
4184  return node;
4185 }
4186 
4190 static pm_imaginary_node_t *
4191 pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4192  assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
4193 
4194  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4195  *node = (pm_imaginary_node_t) {
4196  {
4197  .type = PM_IMAGINARY_NODE,
4198  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4199  .node_id = PM_NODE_IDENTIFY(parser),
4200  .location = PM_LOCATION_TOKEN_VALUE(token)
4201  },
4202  .numeric = (pm_node_t *) pm_float_node_create(parser, &((pm_token_t) {
4203  .type = PM_TOKEN_FLOAT,
4204  .start = token->start,
4205  .end = token->end - 1
4206  }))
4207  };
4208 
4209  return node;
4210 }
4211 
4215 static pm_rational_node_t *
4216 pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
4217  assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
4218 
4219  pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4220  *node = (pm_rational_node_t) {
4221  {
4222  .type = PM_RATIONAL_NODE,
4223  .flags = PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
4224  .node_id = PM_NODE_IDENTIFY(parser),
4225  .location = PM_LOCATION_TOKEN_VALUE(token)
4226  },
4227  .numerator = { 0 },
4228  .denominator = { 0 }
4229  };
4230 
4231  const uint8_t *start = token->start;
4232  const uint8_t *end = token->end - 1; // r
4233 
4234  while (start < end && *start == '0') start++; // 0.1 -> .1
4235  while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
4236 
4237  size_t length = (size_t) (end - start);
4238  if (length == 1) {
4239  node->denominator.value = 1;
4240  return node;
4241  }
4242 
4243  const uint8_t *point = memchr(start, '.', length);
4244  assert(point && "should have a decimal point");
4245 
4246  uint8_t *digits = malloc(length);
4247  if (digits == NULL) {
4248  fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
4249  abort();
4250  }
4251 
4252  memcpy(digits, start, (unsigned long) (point - start));
4253  memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
4254  pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
4255 
4256  digits[0] = '1';
4257  if (end - point > 1) memset(digits + 1, '0', (size_t) (end - point - 1));
4258  pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + (end - point));
4259  free(digits);
4260 
4261  pm_integers_reduce(&node->numerator, &node->denominator);
4262  return node;
4263 }
4264 
4269 static pm_imaginary_node_t *
4270 pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4271  assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4272 
4273  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4274  *node = (pm_imaginary_node_t) {
4275  {
4276  .type = PM_IMAGINARY_NODE,
4277  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4278  .node_id = PM_NODE_IDENTIFY(parser),
4279  .location = PM_LOCATION_TOKEN_VALUE(token)
4280  },
4281  .numeric = (pm_node_t *) pm_float_node_rational_create(parser, &((pm_token_t) {
4282  .type = PM_TOKEN_FLOAT_RATIONAL,
4283  .start = token->start,
4284  .end = token->end - 1
4285  }))
4286  };
4287 
4288  return node;
4289 }
4290 
4294 static pm_for_node_t *
4295 pm_for_node_create(
4296  pm_parser_t *parser,
4297  pm_node_t *index,
4298  pm_node_t *collection,
4299  pm_statements_node_t *statements,
4300  const pm_token_t *for_keyword,
4301  const pm_token_t *in_keyword,
4302  const pm_token_t *do_keyword,
4303  const pm_token_t *end_keyword
4304 ) {
4305  pm_for_node_t *node = PM_NODE_ALLOC(parser, pm_for_node_t);
4306 
4307  *node = (pm_for_node_t) {
4308  {
4309  .type = PM_FOR_NODE,
4310  .node_id = PM_NODE_IDENTIFY(parser),
4311  .location = {
4312  .start = for_keyword->start,
4313  .end = end_keyword->end
4314  },
4315  },
4316  .index = index,
4317  .collection = collection,
4318  .statements = statements,
4319  .for_keyword_loc = PM_LOCATION_TOKEN_VALUE(for_keyword),
4320  .in_keyword_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
4321  .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
4322  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
4323  };
4324 
4325  return node;
4326 }
4327 
4331 static pm_forwarding_arguments_node_t *
4332 pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4333  assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4334  pm_forwarding_arguments_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_arguments_node_t);
4335 
4336  *node = (pm_forwarding_arguments_node_t) {{
4337  .type = PM_FORWARDING_ARGUMENTS_NODE,
4338  .node_id = PM_NODE_IDENTIFY(parser),
4339  .location = PM_LOCATION_TOKEN_VALUE(token)
4340  }};
4341 
4342  return node;
4343 }
4344 
4348 static pm_forwarding_parameter_node_t *
4349 pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4350  assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4351  pm_forwarding_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_parameter_node_t);
4352 
4353  *node = (pm_forwarding_parameter_node_t) {{
4354  .type = PM_FORWARDING_PARAMETER_NODE,
4355  .node_id = PM_NODE_IDENTIFY(parser),
4356  .location = PM_LOCATION_TOKEN_VALUE(token)
4357  }};
4358 
4359  return node;
4360 }
4361 
4365 static pm_forwarding_super_node_t *
4366 pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4367  assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4368  assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4369  pm_forwarding_super_node_t *node = PM_NODE_ALLOC(parser, pm_forwarding_super_node_t);
4370 
4371  pm_block_node_t *block = NULL;
4372  if (arguments->block != NULL) {
4373  block = (pm_block_node_t *) arguments->block;
4374  }
4375 
4376  *node = (pm_forwarding_super_node_t) {
4377  {
4378  .type = PM_FORWARDING_SUPER_NODE,
4379  .node_id = PM_NODE_IDENTIFY(parser),
4380  .location = {
4381  .start = token->start,
4382  .end = block != NULL ? block->base.location.end : token->end
4383  },
4384  },
4385  .block = block
4386  };
4387 
4388  return node;
4389 }
4390 
4395 static pm_hash_pattern_node_t *
4396 pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4397  pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4398 
4399  *node = (pm_hash_pattern_node_t) {
4400  {
4401  .type = PM_HASH_PATTERN_NODE,
4402  .node_id = PM_NODE_IDENTIFY(parser),
4403  .location = {
4404  .start = opening->start,
4405  .end = closing->end
4406  },
4407  },
4408  .constant = NULL,
4409  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4410  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
4411  .elements = { 0 },
4412  .rest = NULL
4413  };
4414 
4415  return node;
4416 }
4417 
4421 static pm_hash_pattern_node_t *
4422 pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4423  pm_hash_pattern_node_t *node = PM_NODE_ALLOC(parser, pm_hash_pattern_node_t);
4424 
4425  const uint8_t *start;
4426  const uint8_t *end;
4427 
4428  if (elements->size > 0) {
4429  if (rest) {
4430  start = elements->nodes[0]->location.start;
4431  end = rest->location.end;
4432  } else {
4433  start = elements->nodes[0]->location.start;
4434  end = elements->nodes[elements->size - 1]->location.end;
4435  }
4436  } else {
4437  assert(rest != NULL);
4438  start = rest->location.start;
4439  end = rest->location.end;
4440  }
4441 
4442  *node = (pm_hash_pattern_node_t) {
4443  {
4444  .type = PM_HASH_PATTERN_NODE,
4445  .node_id = PM_NODE_IDENTIFY(parser),
4446  .location = {
4447  .start = start,
4448  .end = end
4449  },
4450  },
4451  .constant = NULL,
4452  .elements = { 0 },
4453  .rest = rest,
4454  .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4455  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4456  };
4457 
4458  pm_node_t *element;
4459  PM_NODE_LIST_FOREACH(elements, index, element) {
4460  pm_node_list_append(&node->elements, element);
4461  }
4462 
4463  return node;
4464 }
4465 
4469 static pm_constant_id_t
4470 pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4471  switch (PM_NODE_TYPE(target)) {
4472  case PM_GLOBAL_VARIABLE_READ_NODE:
4473  return ((pm_global_variable_read_node_t *) target)->name;
4474  case PM_BACK_REFERENCE_READ_NODE:
4475  return ((pm_back_reference_read_node_t *) target)->name;
4476  case PM_NUMBERED_REFERENCE_READ_NODE:
4477  // This will only ever happen in the event of a syntax error, but we
4478  // still need to provide something for the node.
4479  return pm_parser_constant_id_location(parser, target->location.start, target->location.end);
4480  default:
4481  assert(false && "unreachable");
4482  return (pm_constant_id_t) -1;
4483  }
4484 }
4485 
4489 static pm_global_variable_and_write_node_t *
4490 pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4491  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4492  pm_global_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_and_write_node_t);
4493 
4494  *node = (pm_global_variable_and_write_node_t) {
4495  {
4496  .type = PM_GLOBAL_VARIABLE_AND_WRITE_NODE,
4497  .node_id = PM_NODE_IDENTIFY(parser),
4498  .location = {
4499  .start = target->location.start,
4500  .end = value->location.end
4501  }
4502  },
4503  .name = pm_global_variable_write_name(parser, target),
4504  .name_loc = target->location,
4505  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4506  .value = value
4507  };
4508 
4509  return node;
4510 }
4511 
4515 static pm_global_variable_operator_write_node_t *
4516 pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4517  pm_global_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_operator_write_node_t);
4518 
4519  *node = (pm_global_variable_operator_write_node_t) {
4520  {
4521  .type = PM_GLOBAL_VARIABLE_OPERATOR_WRITE_NODE,
4522  .node_id = PM_NODE_IDENTIFY(parser),
4523  .location = {
4524  .start = target->location.start,
4525  .end = value->location.end
4526  }
4527  },
4528  .name = pm_global_variable_write_name(parser, target),
4529  .name_loc = target->location,
4530  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4531  .value = value,
4532  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
4533  };
4534 
4535  return node;
4536 }
4537 
4541 static pm_global_variable_or_write_node_t *
4542 pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4543  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4544  pm_global_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_or_write_node_t);
4545 
4546  *node = (pm_global_variable_or_write_node_t) {
4547  {
4548  .type = PM_GLOBAL_VARIABLE_OR_WRITE_NODE,
4549  .node_id = PM_NODE_IDENTIFY(parser),
4550  .location = {
4551  .start = target->location.start,
4552  .end = value->location.end
4553  }
4554  },
4555  .name = pm_global_variable_write_name(parser, target),
4556  .name_loc = target->location,
4557  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
4558  .value = value
4559  };
4560 
4561  return node;
4562 }
4563 
4567 static pm_global_variable_read_node_t *
4568 pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4569  pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4570 
4571  *node = (pm_global_variable_read_node_t) {
4572  {
4573  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4574  .node_id = PM_NODE_IDENTIFY(parser),
4575  .location = PM_LOCATION_TOKEN_VALUE(name),
4576  },
4577  .name = pm_parser_constant_id_token(parser, name)
4578  };
4579 
4580  return node;
4581 }
4582 
4586 static pm_global_variable_read_node_t *
4587 pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4588  pm_global_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_read_node_t);
4589 
4590  *node = (pm_global_variable_read_node_t) {
4591  {
4592  .type = PM_GLOBAL_VARIABLE_READ_NODE,
4593  .node_id = PM_NODE_IDENTIFY(parser),
4594  .location = PM_LOCATION_NULL_VALUE(parser)
4595  },
4596  .name = name
4597  };
4598 
4599  return node;
4600 }
4601 
4605 static pm_global_variable_write_node_t *
4606 pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4607  pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4608 
4609  *node = (pm_global_variable_write_node_t) {
4610  {
4611  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4612  .node_id = PM_NODE_IDENTIFY(parser),
4613  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4614  .location = {
4615  .start = target->location.start,
4616  .end = value->location.end
4617  },
4618  },
4619  .name = pm_global_variable_write_name(parser, target),
4620  .name_loc = PM_LOCATION_NODE_VALUE(target),
4621  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
4622  .value = value
4623  };
4624 
4625  return node;
4626 }
4627 
4631 static pm_global_variable_write_node_t *
4632 pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4633  pm_global_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_global_variable_write_node_t);
4634 
4635  *node = (pm_global_variable_write_node_t) {
4636  {
4637  .type = PM_GLOBAL_VARIABLE_WRITE_NODE,
4638  .node_id = PM_NODE_IDENTIFY(parser),
4639  .location = PM_LOCATION_NULL_VALUE(parser)
4640  },
4641  .name = name,
4642  .name_loc = PM_LOCATION_NULL_VALUE(parser),
4643  .operator_loc = PM_LOCATION_NULL_VALUE(parser),
4644  .value = value
4645  };
4646 
4647  return node;
4648 }
4649 
4653 static pm_hash_node_t *
4654 pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4655  assert(opening != NULL);
4656  pm_hash_node_t *node = PM_NODE_ALLOC(parser, pm_hash_node_t);
4657 
4658  *node = (pm_hash_node_t) {
4659  {
4660  .type = PM_HASH_NODE,
4661  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4662  .node_id = PM_NODE_IDENTIFY(parser),
4663  .location = PM_LOCATION_TOKEN_VALUE(opening)
4664  },
4665  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
4666  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
4667  .elements = { 0 }
4668  };
4669 
4670  return node;
4671 }
4672 
4676 static inline void
4677 pm_hash_node_elements_append(pm_hash_node_t *hash, pm_node_t *element) {
4678  pm_node_list_append(&hash->elements, element);
4679 
4680  bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4681  if (static_literal) {
4682  pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4683  static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4684  static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4685  static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4686  }
4687 
4688  if (!static_literal) {
4689  pm_node_flag_unset((pm_node_t *)hash, PM_NODE_FLAG_STATIC_LITERAL);
4690  }
4691 }
4692 
4693 static inline void
4694 pm_hash_node_closing_loc_set(pm_hash_node_t *hash, pm_token_t *token) {
4695  hash->base.location.end = token->end;
4696  hash->closing_loc = PM_LOCATION_TOKEN_VALUE(token);
4697 }
4698 
4702 static pm_if_node_t *
4703 pm_if_node_create(pm_parser_t *parser,
4704  const pm_token_t *if_keyword,
4705  pm_node_t *predicate,
4706  const pm_token_t *then_keyword,
4707  pm_statements_node_t *statements,
4708  pm_node_t *subsequent,
4709  const pm_token_t *end_keyword
4710 ) {
4711  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4712  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4713 
4714  const uint8_t *end;
4715  if (end_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4716  end = end_keyword->end;
4717  } else if (subsequent != NULL) {
4718  end = subsequent->location.end;
4719  } else if (pm_statements_node_body_length(statements) != 0) {
4720  end = statements->base.location.end;
4721  } else {
4722  end = predicate->location.end;
4723  }
4724 
4725  *node = (pm_if_node_t) {
4726  {
4727  .type = PM_IF_NODE,
4728  .flags = PM_NODE_FLAG_NEWLINE,
4729  .node_id = PM_NODE_IDENTIFY(parser),
4730  .location = {
4731  .start = if_keyword->start,
4732  .end = end
4733  },
4734  },
4735  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4736  .predicate = predicate,
4737  .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
4738  .statements = statements,
4739  .subsequent = subsequent,
4740  .end_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(end_keyword)
4741  };
4742 
4743  return node;
4744 }
4745 
4749 static pm_if_node_t *
4750 pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4751  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4752  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4753 
4754  pm_statements_node_t *statements = pm_statements_node_create(parser);
4755  pm_statements_node_body_append(parser, statements, statement, true);
4756 
4757  *node = (pm_if_node_t) {
4758  {
4759  .type = PM_IF_NODE,
4760  .flags = PM_NODE_FLAG_NEWLINE,
4761  .node_id = PM_NODE_IDENTIFY(parser),
4762  .location = {
4763  .start = statement->location.start,
4764  .end = predicate->location.end
4765  },
4766  },
4767  .if_keyword_loc = PM_LOCATION_TOKEN_VALUE(if_keyword),
4768  .predicate = predicate,
4769  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4770  .statements = statements,
4771  .subsequent = NULL,
4772  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4773  };
4774 
4775  return node;
4776 }
4777 
4781 static pm_if_node_t *
4782 pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4783  pm_assert_value_expression(parser, predicate);
4784  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4785 
4786  pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4787  pm_statements_node_body_append(parser, if_statements, true_expression, true);
4788 
4789  pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4790  pm_statements_node_body_append(parser, else_statements, false_expression, true);
4791 
4792  pm_token_t end_keyword = not_provided(parser);
4793  pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, &end_keyword);
4794 
4795  pm_if_node_t *node = PM_NODE_ALLOC(parser, pm_if_node_t);
4796 
4797  *node = (pm_if_node_t) {
4798  {
4799  .type = PM_IF_NODE,
4800  .flags = PM_NODE_FLAG_NEWLINE,
4801  .node_id = PM_NODE_IDENTIFY(parser),
4802  .location = {
4803  .start = predicate->location.start,
4804  .end = false_expression->location.end,
4805  },
4806  },
4807  .if_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
4808  .predicate = predicate,
4809  .then_keyword_loc = PM_LOCATION_TOKEN_VALUE(qmark),
4810  .statements = if_statements,
4811  .subsequent = (pm_node_t *) else_node,
4812  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
4813  };
4814 
4815  return node;
4816 
4817 }
4818 
4819 static inline void
4820 pm_if_node_end_keyword_loc_set(pm_if_node_t *node, const pm_token_t *keyword) {
4821  node->base.location.end = keyword->end;
4822  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4823 }
4824 
4825 static inline void
4826 pm_else_node_end_keyword_loc_set(pm_else_node_t *node, const pm_token_t *keyword) {
4827  node->base.location.end = keyword->end;
4828  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword);
4829 }
4830 
4834 static pm_implicit_node_t *
4835 pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4836  pm_implicit_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_node_t);
4837 
4838  *node = (pm_implicit_node_t) {
4839  {
4840  .type = PM_IMPLICIT_NODE,
4841  .node_id = PM_NODE_IDENTIFY(parser),
4842  .location = value->location
4843  },
4844  .value = value
4845  };
4846 
4847  return node;
4848 }
4849 
4853 static pm_implicit_rest_node_t *
4854 pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4855  assert(token->type == PM_TOKEN_COMMA);
4856 
4857  pm_implicit_rest_node_t *node = PM_NODE_ALLOC(parser, pm_implicit_rest_node_t);
4858 
4859  *node = (pm_implicit_rest_node_t) {
4860  {
4861  .type = PM_IMPLICIT_REST_NODE,
4862  .node_id = PM_NODE_IDENTIFY(parser),
4863  .location = PM_LOCATION_TOKEN_VALUE(token)
4864  }
4865  };
4866 
4867  return node;
4868 }
4869 
4873 static pm_integer_node_t *
4874 pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4875  assert(token->type == PM_TOKEN_INTEGER);
4876  pm_integer_node_t *node = PM_NODE_ALLOC(parser, pm_integer_node_t);
4877 
4878  *node = (pm_integer_node_t) {
4879  {
4880  .type = PM_INTEGER_NODE,
4881  .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4882  .node_id = PM_NODE_IDENTIFY(parser),
4883  .location = PM_LOCATION_TOKEN_VALUE(token)
4884  },
4885  .value = { 0 }
4886  };
4887 
4888  pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4889  switch (base) {
4890  case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4891  case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4892  case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4893  case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4894  default: assert(false && "unreachable"); break;
4895  }
4896 
4897  pm_integer_parse(&node->value, integer_base, token->start, token->end);
4898  return node;
4899 }
4900 
4905 static pm_imaginary_node_t *
4906 pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4907  assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4908 
4909  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4910  *node = (pm_imaginary_node_t) {
4911  {
4912  .type = PM_IMAGINARY_NODE,
4913  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4914  .node_id = PM_NODE_IDENTIFY(parser),
4915  .location = PM_LOCATION_TOKEN_VALUE(token)
4916  },
4917  .numeric = (pm_node_t *) pm_integer_node_create(parser, base, &((pm_token_t) {
4918  .type = PM_TOKEN_INTEGER,
4919  .start = token->start,
4920  .end = token->end - 1
4921  }))
4922  };
4923 
4924  return node;
4925 }
4926 
4931 static pm_rational_node_t *
4932 pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4933  assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4934 
4935  pm_rational_node_t *node = PM_NODE_ALLOC(parser, pm_rational_node_t);
4936  *node = (pm_rational_node_t) {
4937  {
4938  .type = PM_RATIONAL_NODE,
4939  .flags = base | PM_NODE_FLAG_STATIC_LITERAL,
4940  .node_id = PM_NODE_IDENTIFY(parser),
4941  .location = PM_LOCATION_TOKEN_VALUE(token)
4942  },
4943  .numerator = { 0 },
4944  .denominator = { .value = 1, 0 }
4945  };
4946 
4947  pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4948  switch (base) {
4949  case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4950  case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4951  case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4952  case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4953  default: assert(false && "unreachable"); break;
4954  }
4955 
4956  pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4957 
4958  return node;
4959 }
4960 
4965 static pm_imaginary_node_t *
4966 pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4967  assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4968 
4969  pm_imaginary_node_t *node = PM_NODE_ALLOC(parser, pm_imaginary_node_t);
4970  *node = (pm_imaginary_node_t) {
4971  {
4972  .type = PM_IMAGINARY_NODE,
4973  .flags = PM_NODE_FLAG_STATIC_LITERAL,
4974  .node_id = PM_NODE_IDENTIFY(parser),
4975  .location = PM_LOCATION_TOKEN_VALUE(token)
4976  },
4977  .numeric = (pm_node_t *) pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4978  .type = PM_TOKEN_INTEGER_RATIONAL,
4979  .start = token->start,
4980  .end = token->end - 1
4981  }))
4982  };
4983 
4984  return node;
4985 }
4986 
4990 static pm_in_node_t *
4991 pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4992  pm_in_node_t *node = PM_NODE_ALLOC(parser, pm_in_node_t);
4993 
4994  const uint8_t *end;
4995  if (statements != NULL) {
4996  end = statements->base.location.end;
4997  } else if (then_keyword->type != PM_TOKEN_NOT_PROVIDED) {
4998  end = then_keyword->end;
4999  } else {
5000  end = pattern->location.end;
5001  }
5002 
5003  *node = (pm_in_node_t) {
5004  {
5005  .type = PM_IN_NODE,
5006  .node_id = PM_NODE_IDENTIFY(parser),
5007  .location = {
5008  .start = in_keyword->start,
5009  .end = end
5010  },
5011  },
5012  .pattern = pattern,
5013  .statements = statements,
5014  .in_loc = PM_LOCATION_TOKEN_VALUE(in_keyword),
5015  .then_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword)
5016  };
5017 
5018  return node;
5019 }
5020 
5024 static pm_instance_variable_and_write_node_t *
5025 pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5026  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5027  pm_instance_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_and_write_node_t);
5028 
5029  *node = (pm_instance_variable_and_write_node_t) {
5030  {
5031  .type = PM_INSTANCE_VARIABLE_AND_WRITE_NODE,
5032  .node_id = PM_NODE_IDENTIFY(parser),
5033  .location = {
5034  .start = target->base.location.start,
5035  .end = value->location.end
5036  }
5037  },
5038  .name = target->name,
5039  .name_loc = target->base.location,
5040  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5041  .value = value
5042  };
5043 
5044  return node;
5045 }
5046 
5050 static pm_instance_variable_operator_write_node_t *
5051 pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5052  pm_instance_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_operator_write_node_t);
5053 
5054  *node = (pm_instance_variable_operator_write_node_t) {
5055  {
5056  .type = PM_INSTANCE_VARIABLE_OPERATOR_WRITE_NODE,
5057  .node_id = PM_NODE_IDENTIFY(parser),
5058  .location = {
5059  .start = target->base.location.start,
5060  .end = value->location.end
5061  }
5062  },
5063  .name = target->name,
5064  .name_loc = target->base.location,
5065  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5066  .value = value,
5067  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1)
5068  };
5069 
5070  return node;
5071 }
5072 
5076 static pm_instance_variable_or_write_node_t *
5077 pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5078  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5079  pm_instance_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_or_write_node_t);
5080 
5081  *node = (pm_instance_variable_or_write_node_t) {
5082  {
5083  .type = PM_INSTANCE_VARIABLE_OR_WRITE_NODE,
5084  .node_id = PM_NODE_IDENTIFY(parser),
5085  .location = {
5086  .start = target->base.location.start,
5087  .end = value->location.end
5088  }
5089  },
5090  .name = target->name,
5091  .name_loc = target->base.location,
5092  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5093  .value = value
5094  };
5095 
5096  return node;
5097 }
5098 
5102 static pm_instance_variable_read_node_t *
5103 pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
5104  assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
5105  pm_instance_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_read_node_t);
5106 
5107  *node = (pm_instance_variable_read_node_t) {
5108  {
5109  .type = PM_INSTANCE_VARIABLE_READ_NODE,
5110  .node_id = PM_NODE_IDENTIFY(parser),
5111  .location = PM_LOCATION_TOKEN_VALUE(token)
5112  },
5113  .name = pm_parser_constant_id_token(parser, token)
5114  };
5115 
5116  return node;
5117 }
5118 
5123 static pm_instance_variable_write_node_t *
5124 pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
5125  pm_instance_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_instance_variable_write_node_t);
5126  *node = (pm_instance_variable_write_node_t) {
5127  {
5128  .type = PM_INSTANCE_VARIABLE_WRITE_NODE,
5129  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5130  .node_id = PM_NODE_IDENTIFY(parser),
5131  .location = {
5132  .start = read_node->base.location.start,
5133  .end = value->location.end
5134  }
5135  },
5136  .name = read_node->name,
5137  .name_loc = PM_LOCATION_NODE_BASE_VALUE(read_node),
5138  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator),
5139  .value = value
5140  };
5141 
5142  return node;
5143 }
5144 
5150 static void
5151 pm_interpolated_node_append(pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
5152  switch (PM_NODE_TYPE(part)) {
5153  case PM_STRING_NODE:
5154  pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5155  break;
5156  case PM_EMBEDDED_STATEMENTS_NODE: {
5157  pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5158  pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5159 
5160  if (embedded == NULL) {
5161  // If there are no statements or more than one statement, then
5162  // we lose the static literal flag.
5163  pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5164  } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5165  // If the embedded statement is a string, then we can keep the
5166  // static literal flag and mark the string as frozen.
5167  pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
5168  } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5169  // If the embedded statement is an interpolated string and it's
5170  // a static literal, then we can keep the static literal flag.
5171  } else {
5172  // Otherwise we lose the static literal flag.
5173  pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
5174  }
5175 
5176  break;
5177  }
5178  case PM_EMBEDDED_VARIABLE_NODE:
5179  pm_node_flag_unset((pm_node_t *) node, PM_NODE_FLAG_STATIC_LITERAL);
5180  break;
5181  default:
5182  assert(false && "unexpected node type");
5183  break;
5184  }
5185 
5186  pm_node_list_append(parts, part);
5187 }
5188 
5192 static pm_interpolated_regular_expression_node_t *
5193 pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
5194  pm_interpolated_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_regular_expression_node_t);
5195 
5196  *node = (pm_interpolated_regular_expression_node_t) {
5197  {
5198  .type = PM_INTERPOLATED_REGULAR_EXPRESSION_NODE,
5199  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5200  .node_id = PM_NODE_IDENTIFY(parser),
5201  .location = {
5202  .start = opening->start,
5203  .end = NULL,
5204  },
5205  },
5206  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5207  .closing_loc = PM_LOCATION_TOKEN_VALUE(opening),
5208  .parts = { 0 }
5209  };
5210 
5211  return node;
5212 }
5213 
5214 static inline void
5215 pm_interpolated_regular_expression_node_append(pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
5216  if (node->base.location.start > part->location.start) {
5217  node->base.location.start = part->location.start;
5218  }
5219  if (node->base.location.end < part->location.end) {
5220  node->base.location.end = part->location.end;
5221  }
5222 
5223  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5224 }
5225 
5226 static inline void
5227 pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
5228  node->closing_loc = PM_LOCATION_TOKEN_VALUE(closing);
5229  node->base.location.end = closing->end;
5230  pm_node_flag_set((pm_node_t *) node, pm_regular_expression_flags_create(parser, closing));
5231 }
5232 
5256 static inline void
5257 pm_interpolated_string_node_append(pm_interpolated_string_node_t *node, pm_node_t *part) {
5258 #define CLEAR_FLAGS(node) \
5259  node->base.flags = (pm_node_flags_t) (node->base.flags & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
5260 
5261 #define MUTABLE_FLAGS(node) \
5262  node->base.flags = (pm_node_flags_t) ((node->base.flags | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
5263 
5264  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5265  node->base.location.start = part->location.start;
5266  }
5267 
5268  node->base.location.end = MAX(node->base.location.end, part->location.end);
5269 
5270  switch (PM_NODE_TYPE(part)) {
5271  case PM_STRING_NODE:
5272  part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5273  break;
5274  case PM_INTERPOLATED_STRING_NODE:
5275  if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
5276  // If the string that we're concatenating is a static literal,
5277  // then we can keep the static literal flag for this string.
5278  } else {
5279  // Otherwise, we lose the static literal flag here and we should
5280  // also clear the mutability flags.
5281  CLEAR_FLAGS(node);
5282  }
5283  break;
5284  case PM_EMBEDDED_STATEMENTS_NODE: {
5285  pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
5286  pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
5287 
5288  if (embedded == NULL) {
5289  // If we're embedding multiple statements or no statements, then
5290  // the string is not longer a static literal.
5291  CLEAR_FLAGS(node);
5292  } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
5293  // If the embedded statement is a string, then we can make that
5294  // string as frozen and static literal, and not touch the static
5295  // literal status of this string.
5296  embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
5297 
5298  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5299  MUTABLE_FLAGS(node);
5300  }
5301  } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
5302  // If the embedded statement is an interpolated string, but that
5303  // string is marked as static literal, then we can keep our
5304  // static literal status for this string.
5305  if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
5306  MUTABLE_FLAGS(node);
5307  }
5308  } else {
5309  // In all other cases, we lose the static literal flag here and
5310  // become mutable.
5311  CLEAR_FLAGS(node);
5312  }
5313 
5314  break;
5315  }
5316  case PM_EMBEDDED_VARIABLE_NODE:
5317  // Embedded variables clear static literal, which means we also
5318  // should clear the mutability flags.
5319  CLEAR_FLAGS(node);
5320  break;
5321  default:
5322  assert(false && "unexpected node type");
5323  break;
5324  }
5325 
5326  pm_node_list_append(&node->parts, part);
5327 
5328 #undef CLEAR_FLAGS
5329 #undef MUTABLE_FLAGS
5330 }
5331 
5335 static pm_interpolated_string_node_t *
5336 pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5337  pm_interpolated_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_string_node_t);
5338  pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
5339 
5340  switch (parser->frozen_string_literal) {
5341  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
5342  flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
5343  break;
5344  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
5345  flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
5346  break;
5347  }
5348 
5349  *node = (pm_interpolated_string_node_t) {
5350  {
5351  .type = PM_INTERPOLATED_STRING_NODE,
5352  .flags = flags,
5353  .node_id = PM_NODE_IDENTIFY(parser),
5354  .location = {
5355  .start = opening->start,
5356  .end = closing->end,
5357  },
5358  },
5359  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5360  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5361  .parts = { 0 }
5362  };
5363 
5364  if (parts != NULL) {
5365  pm_node_t *part;
5366  PM_NODE_LIST_FOREACH(parts, index, part) {
5367  pm_interpolated_string_node_append(node, part);
5368  }
5369  }
5370 
5371  return node;
5372 }
5373 
5377 static void
5378 pm_interpolated_string_node_closing_set(pm_interpolated_string_node_t *node, const pm_token_t *closing) {
5379  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5380  node->base.location.end = closing->end;
5381 }
5382 
5383 static void
5384 pm_interpolated_symbol_node_append(pm_interpolated_symbol_node_t *node, pm_node_t *part) {
5385  if (node->parts.size == 0 && node->opening_loc.start == NULL) {
5386  node->base.location.start = part->location.start;
5387  }
5388 
5389  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5390  node->base.location.end = MAX(node->base.location.end, part->location.end);
5391 }
5392 
5393 static void
5394 pm_interpolated_symbol_node_closing_loc_set(pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
5395  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5396  node->base.location.end = closing->end;
5397 }
5398 
5402 static pm_interpolated_symbol_node_t *
5403 pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
5404  pm_interpolated_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_symbol_node_t);
5405 
5406  *node = (pm_interpolated_symbol_node_t) {
5407  {
5408  .type = PM_INTERPOLATED_SYMBOL_NODE,
5409  .flags = PM_NODE_FLAG_STATIC_LITERAL,
5410  .node_id = PM_NODE_IDENTIFY(parser),
5411  .location = {
5412  .start = opening->start,
5413  .end = closing->end,
5414  },
5415  },
5416  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5417  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5418  .parts = { 0 }
5419  };
5420 
5421  if (parts != NULL) {
5422  pm_node_t *part;
5423  PM_NODE_LIST_FOREACH(parts, index, part) {
5424  pm_interpolated_symbol_node_append(node, part);
5425  }
5426  }
5427 
5428  return node;
5429 }
5430 
5434 static pm_interpolated_x_string_node_t *
5435 pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5436  pm_interpolated_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_interpolated_x_string_node_t);
5437 
5438  *node = (pm_interpolated_x_string_node_t) {
5439  {
5440  .type = PM_INTERPOLATED_X_STRING_NODE,
5441  .node_id = PM_NODE_IDENTIFY(parser),
5442  .location = {
5443  .start = opening->start,
5444  .end = closing->end
5445  },
5446  },
5447  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
5448  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
5449  .parts = { 0 }
5450  };
5451 
5452  return node;
5453 }
5454 
5455 static inline void
5456 pm_interpolated_xstring_node_append(pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5457  pm_interpolated_node_append((pm_node_t *) node, &node->parts, part);
5458  node->base.location.end = part->location.end;
5459 }
5460 
5461 static inline void
5462 pm_interpolated_xstring_node_closing_set(pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5463  node->closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing);
5464  node->base.location.end = closing->end;
5465 }
5466 
5470 static pm_it_local_variable_read_node_t *
5471 pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5472  pm_it_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_it_local_variable_read_node_t);
5473 
5474  *node = (pm_it_local_variable_read_node_t) {
5475  {
5476  .type = PM_IT_LOCAL_VARIABLE_READ_NODE,
5477  .node_id = PM_NODE_IDENTIFY(parser),
5478  .location = PM_LOCATION_TOKEN_VALUE(name)
5479  }
5480  };
5481 
5482  return node;
5483 }
5484 
5488 static pm_it_parameters_node_t *
5489 pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5490  pm_it_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_it_parameters_node_t);
5491 
5492  *node = (pm_it_parameters_node_t) {
5493  {
5494  .type = PM_IT_PARAMETERS_NODE,
5495  .node_id = PM_NODE_IDENTIFY(parser),
5496  .location = {
5497  .start = opening->start,
5498  .end = closing->end
5499  }
5500  }
5501  };
5502 
5503  return node;
5504 }
5505 
5509 static pm_keyword_hash_node_t *
5510 pm_keyword_hash_node_create(pm_parser_t *parser) {
5511  pm_keyword_hash_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_hash_node_t);
5512 
5513  *node = (pm_keyword_hash_node_t) {
5514  .base = {
5515  .type = PM_KEYWORD_HASH_NODE,
5516  .flags = PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5517  .node_id = PM_NODE_IDENTIFY(parser),
5518  .location = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5519  },
5520  .elements = { 0 }
5521  };
5522 
5523  return node;
5524 }
5525 
5529 static void
5530 pm_keyword_hash_node_elements_append(pm_keyword_hash_node_t *hash, pm_node_t *element) {
5531  // If the element being added is not an AssocNode or does not have a symbol
5532  // key, then we want to turn the SYMBOL_KEYS flag off.
5533  if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5534  pm_node_flag_unset((pm_node_t *)hash, PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5535  }
5536 
5537  pm_node_list_append(&hash->elements, element);
5538  if (hash->base.location.start == NULL) {
5539  hash->base.location.start = element->location.start;
5540  }
5541  hash->base.location.end = element->location.end;
5542 }
5543 
5547 static pm_required_keyword_parameter_node_t *
5548 pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5549  pm_required_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_keyword_parameter_node_t);
5550 
5551  *node = (pm_required_keyword_parameter_node_t) {
5552  {
5553  .type = PM_REQUIRED_KEYWORD_PARAMETER_NODE,
5554  .node_id = PM_NODE_IDENTIFY(parser),
5555  .location = {
5556  .start = name->start,
5557  .end = name->end
5558  },
5559  },
5560  .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5561  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5562  };
5563 
5564  return node;
5565 }
5566 
5570 static pm_optional_keyword_parameter_node_t *
5571 pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5572  pm_optional_keyword_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_keyword_parameter_node_t);
5573 
5574  *node = (pm_optional_keyword_parameter_node_t) {
5575  {
5576  .type = PM_OPTIONAL_KEYWORD_PARAMETER_NODE,
5577  .node_id = PM_NODE_IDENTIFY(parser),
5578  .location = {
5579  .start = name->start,
5580  .end = value->location.end
5581  },
5582  },
5583  .name = pm_parser_constant_id_location(parser, name->start, name->end - 1),
5584  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
5585  .value = value
5586  };
5587 
5588  return node;
5589 }
5590 
5594 static pm_keyword_rest_parameter_node_t *
5595 pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5596  pm_keyword_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_keyword_rest_parameter_node_t);
5597 
5598  *node = (pm_keyword_rest_parameter_node_t) {
5599  {
5600  .type = PM_KEYWORD_REST_PARAMETER_NODE,
5601  .node_id = PM_NODE_IDENTIFY(parser),
5602  .location = {
5603  .start = operator->start,
5604  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
5605  },
5606  },
5607  .name = pm_parser_optional_constant_id_token(parser, name),
5608  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
5609  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5610  };
5611 
5612  return node;
5613 }
5614 
5618 static pm_lambda_node_t *
5619 pm_lambda_node_create(
5620  pm_parser_t *parser,
5621  pm_constant_id_list_t *locals,
5622  const pm_token_t *operator,
5623  const pm_token_t *opening,
5624  const pm_token_t *closing,
5625  pm_node_t *parameters,
5626  pm_node_t *body
5627 ) {
5628  pm_lambda_node_t *node = PM_NODE_ALLOC(parser, pm_lambda_node_t);
5629 
5630  *node = (pm_lambda_node_t) {
5631  {
5632  .type = PM_LAMBDA_NODE,
5633  .node_id = PM_NODE_IDENTIFY(parser),
5634  .location = {
5635  .start = operator->start,
5636  .end = closing->end
5637  },
5638  },
5639  .locals = *locals,
5640  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5641  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
5642  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
5643  .parameters = parameters,
5644  .body = body
5645  };
5646 
5647  return node;
5648 }
5649 
5653 static pm_local_variable_and_write_node_t *
5654 pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5655  assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5656  assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5657  pm_local_variable_and_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_and_write_node_t);
5658 
5659  *node = (pm_local_variable_and_write_node_t) {
5660  {
5661  .type = PM_LOCAL_VARIABLE_AND_WRITE_NODE,
5662  .node_id = PM_NODE_IDENTIFY(parser),
5663  .location = {
5664  .start = target->location.start,
5665  .end = value->location.end
5666  }
5667  },
5668  .name_loc = target->location,
5669  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5670  .value = value,
5671  .name = name,
5672  .depth = depth
5673  };
5674 
5675  return node;
5676 }
5677 
5681 static pm_local_variable_operator_write_node_t *
5682 pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5683  pm_local_variable_operator_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_operator_write_node_t);
5684 
5685  *node = (pm_local_variable_operator_write_node_t) {
5686  {
5687  .type = PM_LOCAL_VARIABLE_OPERATOR_WRITE_NODE,
5688  .node_id = PM_NODE_IDENTIFY(parser),
5689  .location = {
5690  .start = target->location.start,
5691  .end = value->location.end
5692  }
5693  },
5694  .name_loc = target->location,
5695  .binary_operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5696  .value = value,
5697  .name = name,
5698  .binary_operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
5699  .depth = depth
5700  };
5701 
5702  return node;
5703 }
5704 
5708 static pm_local_variable_or_write_node_t *
5709 pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5710  assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5711  assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5712  pm_local_variable_or_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_or_write_node_t);
5713 
5714  *node = (pm_local_variable_or_write_node_t) {
5715  {
5716  .type = PM_LOCAL_VARIABLE_OR_WRITE_NODE,
5717  .node_id = PM_NODE_IDENTIFY(parser),
5718  .location = {
5719  .start = target->location.start,
5720  .end = value->location.end
5721  }
5722  },
5723  .name_loc = target->location,
5724  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
5725  .value = value,
5726  .name = name,
5727  .depth = depth
5728  };
5729 
5730  return node;
5731 }
5732 
5736 static pm_local_variable_read_node_t *
5737 pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5738  if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5739 
5740  pm_local_variable_read_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_read_node_t);
5741 
5742  *node = (pm_local_variable_read_node_t) {
5743  {
5744  .type = PM_LOCAL_VARIABLE_READ_NODE,
5745  .node_id = PM_NODE_IDENTIFY(parser),
5746  .location = PM_LOCATION_TOKEN_VALUE(name)
5747  },
5748  .name = name_id,
5749  .depth = depth
5750  };
5751 
5752  return node;
5753 }
5754 
5758 static pm_local_variable_read_node_t *
5759 pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5760  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5761  return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5762 }
5763 
5768 static pm_local_variable_read_node_t *
5769 pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5770  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5771  return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5772 }
5773 
5777 static pm_local_variable_write_node_t *
5778 pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5779  pm_local_variable_write_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_write_node_t);
5780 
5781  *node = (pm_local_variable_write_node_t) {
5782  {
5783  .type = PM_LOCAL_VARIABLE_WRITE_NODE,
5784  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5785  .node_id = PM_NODE_IDENTIFY(parser),
5786  .location = {
5787  .start = name_loc->start,
5788  .end = value->location.end
5789  }
5790  },
5791  .name = name,
5792  .depth = depth,
5793  .value = value,
5794  .name_loc = *name_loc,
5795  .operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator)
5796  };
5797 
5798  return node;
5799 }
5800 
5804 static inline bool
5805 pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5806  return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5807 }
5808 
5813 static inline bool
5814 pm_token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
5815  return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
5816 }
5817 
5822 static inline void
5823 pm_refute_numbered_parameter(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
5824  if (pm_token_is_numbered_parameter(start, end)) {
5825  PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_PARAMETER_NUMBERED_RESERVED, start);
5826  }
5827 }
5828 
5833 static pm_local_variable_target_node_t *
5834 pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5835  pm_refute_numbered_parameter(parser, location->start, location->end);
5836  pm_local_variable_target_node_t *node = PM_NODE_ALLOC(parser, pm_local_variable_target_node_t);
5837 
5838  *node = (pm_local_variable_target_node_t) {
5839  {
5840  .type = PM_LOCAL_VARIABLE_TARGET_NODE,
5841  .node_id = PM_NODE_IDENTIFY(parser),
5842  .location = *location
5843  },
5844  .name = name,
5845  .depth = depth
5846  };
5847 
5848  return node;
5849 }
5850 
5854 static pm_match_predicate_node_t *
5855 pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5856  pm_assert_value_expression(parser, value);
5857 
5858  pm_match_predicate_node_t *node = PM_NODE_ALLOC(parser, pm_match_predicate_node_t);
5859 
5860  *node = (pm_match_predicate_node_t) {
5861  {
5862  .type = PM_MATCH_PREDICATE_NODE,
5863  .node_id = PM_NODE_IDENTIFY(parser),
5864  .location = {
5865  .start = value->location.start,
5866  .end = pattern->location.end
5867  }
5868  },
5869  .value = value,
5870  .pattern = pattern,
5871  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5872  };
5873 
5874  return node;
5875 }
5876 
5880 static pm_match_required_node_t *
5881 pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5882  pm_assert_value_expression(parser, value);
5883 
5884  pm_match_required_node_t *node = PM_NODE_ALLOC(parser, pm_match_required_node_t);
5885 
5886  *node = (pm_match_required_node_t) {
5887  {
5888  .type = PM_MATCH_REQUIRED_NODE,
5889  .node_id = PM_NODE_IDENTIFY(parser),
5890  .location = {
5891  .start = value->location.start,
5892  .end = pattern->location.end
5893  }
5894  },
5895  .value = value,
5896  .pattern = pattern,
5897  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
5898  };
5899 
5900  return node;
5901 }
5902 
5906 static pm_match_write_node_t *
5907 pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5908  pm_match_write_node_t *node = PM_NODE_ALLOC(parser, pm_match_write_node_t);
5909 
5910  *node = (pm_match_write_node_t) {
5911  {
5912  .type = PM_MATCH_WRITE_NODE,
5913  .node_id = PM_NODE_IDENTIFY(parser),
5914  .location = call->base.location
5915  },
5916  .call = call,
5917  .targets = { 0 }
5918  };
5919 
5920  return node;
5921 }
5922 
5926 static pm_module_node_t *
5927 pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5928  pm_module_node_t *node = PM_NODE_ALLOC(parser, pm_module_node_t);
5929 
5930  *node = (pm_module_node_t) {
5931  {
5932  .type = PM_MODULE_NODE,
5933  .node_id = PM_NODE_IDENTIFY(parser),
5934  .location = {
5935  .start = module_keyword->start,
5936  .end = end_keyword->end
5937  }
5938  },
5939  .locals = (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5940  .module_keyword_loc = PM_LOCATION_TOKEN_VALUE(module_keyword),
5941  .constant_path = constant_path,
5942  .body = body,
5943  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword),
5944  .name = pm_parser_constant_id_token(parser, name)
5945  };
5946 
5947  return node;
5948 }
5949 
5953 static pm_multi_target_node_t *
5954 pm_multi_target_node_create(pm_parser_t *parser) {
5955  pm_multi_target_node_t *node = PM_NODE_ALLOC(parser, pm_multi_target_node_t);
5956 
5957  *node = (pm_multi_target_node_t) {
5958  {
5959  .type = PM_MULTI_TARGET_NODE,
5960  .node_id = PM_NODE_IDENTIFY(parser),
5961  .location = { .start = NULL, .end = NULL }
5962  },
5963  .lefts = { 0 },
5964  .rest = NULL,
5965  .rights = { 0 },
5966  .lparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
5967  .rparen_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
5968  };
5969 
5970  return node;
5971 }
5972 
5976 static void
5977 pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5978  if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5979  if (node->rest == NULL) {
5980  node->rest = target;
5981  } else {
5982  pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5983  pm_node_list_append(&node->rights, target);
5984  }
5985  } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5986  if (node->rest == NULL) {
5987  node->rest = target;
5988  } else {
5989  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5990  pm_node_list_append(&node->rights, target);
5991  }
5992  } else if (node->rest == NULL) {
5993  pm_node_list_append(&node->lefts, target);
5994  } else {
5995  pm_node_list_append(&node->rights, target);
5996  }
5997 
5998  if (node->base.location.start == NULL || (node->base.location.start > target->location.start)) {
5999  node->base.location.start = target->location.start;
6000  }
6001 
6002  if (node->base.location.end == NULL || (node->base.location.end < target->location.end)) {
6003  node->base.location.end = target->location.end;
6004  }
6005 }
6006 
6010 static void
6011 pm_multi_target_node_opening_set(pm_multi_target_node_t *node, const pm_token_t *lparen) {
6012  node->base.location.start = lparen->start;
6013  node->lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen);
6014 }
6015 
6019 static void
6020 pm_multi_target_node_closing_set(pm_multi_target_node_t *node, const pm_token_t *rparen) {
6021  node->base.location.end = rparen->end;
6022  node->rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen);
6023 }
6024 
6028 static pm_multi_write_node_t *
6029 pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
6030  pm_multi_write_node_t *node = PM_NODE_ALLOC(parser, pm_multi_write_node_t);
6031 
6032  *node = (pm_multi_write_node_t) {
6033  {
6034  .type = PM_MULTI_WRITE_NODE,
6035  .flags = pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
6036  .node_id = PM_NODE_IDENTIFY(parser),
6037  .location = {
6038  .start = target->base.location.start,
6039  .end = value->location.end
6040  }
6041  },
6042  .lefts = target->lefts,
6043  .rest = target->rest,
6044  .rights = target->rights,
6045  .lparen_loc = target->lparen_loc,
6046  .rparen_loc = target->rparen_loc,
6047  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6048  .value = value
6049  };
6050 
6051  // Explicitly do not call pm_node_destroy here because we want to keep
6052  // around all of the information within the MultiWriteNode node.
6053  xfree(target);
6054 
6055  return node;
6056 }
6057 
6061 static pm_next_node_t *
6062 pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6063  assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
6064  pm_next_node_t *node = PM_NODE_ALLOC(parser, pm_next_node_t);
6065 
6066  *node = (pm_next_node_t) {
6067  {
6068  .type = PM_NEXT_NODE,
6069  .node_id = PM_NODE_IDENTIFY(parser),
6070  .location = {
6071  .start = keyword->start,
6072  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6073  }
6074  },
6075  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6076  .arguments = arguments
6077  };
6078 
6079  return node;
6080 }
6081 
6085 static pm_nil_node_t *
6086 pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
6087  assert(token->type == PM_TOKEN_KEYWORD_NIL);
6088  pm_nil_node_t *node = PM_NODE_ALLOC(parser, pm_nil_node_t);
6089 
6090  *node = (pm_nil_node_t) {{
6091  .type = PM_NIL_NODE,
6092  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6093  .node_id = PM_NODE_IDENTIFY(parser),
6094  .location = PM_LOCATION_TOKEN_VALUE(token)
6095  }};
6096 
6097  return node;
6098 }
6099 
6103 static pm_no_keywords_parameter_node_t *
6104 pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
6105  assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
6106  assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
6107  pm_no_keywords_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_no_keywords_parameter_node_t);
6108 
6109  *node = (pm_no_keywords_parameter_node_t) {
6110  {
6111  .type = PM_NO_KEYWORDS_PARAMETER_NODE,
6112  .node_id = PM_NODE_IDENTIFY(parser),
6113  .location = {
6114  .start = operator->start,
6115  .end = keyword->end
6116  }
6117  },
6118  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6119  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
6120  };
6121 
6122  return node;
6123 }
6124 
6128 static pm_numbered_parameters_node_t *
6129 pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_location_t *location, uint8_t maximum) {
6130  pm_numbered_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_parameters_node_t);
6131 
6132  *node = (pm_numbered_parameters_node_t) {
6133  {
6134  .type = PM_NUMBERED_PARAMETERS_NODE,
6135  .node_id = PM_NODE_IDENTIFY(parser),
6136  .location = *location
6137  },
6138  .maximum = maximum
6139  };
6140 
6141  return node;
6142 }
6143 
6148 #define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
6149 
6156 static uint32_t
6157 pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
6158  const uint8_t *start = token->start + 1;
6159  const uint8_t *end = token->end;
6160 
6161  ptrdiff_t diff = end - start;
6162  assert(diff > 0 && ((unsigned long) diff < SIZE_MAX));
6163  size_t length = (size_t) diff;
6164 
6165  char *digits = xcalloc(length + 1, sizeof(char));
6166  memcpy(digits, start, length);
6167  digits[length] = '\0';
6168 
6169  char *endptr;
6170  errno = 0;
6171  unsigned long value = strtoul(digits, &endptr, 10);
6172 
6173  if ((digits == endptr) || (*endptr != '\0')) {
6174  pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
6175  value = 0;
6176  }
6177 
6178  xfree(digits);
6179 
6180  if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
6181  PM_PARSER_WARN_FORMAT(parser, start, end, PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
6182  value = 0;
6183  }
6184 
6185  return (uint32_t) value;
6186 }
6187 
6188 #undef NTH_REF_MAX
6189 
6193 static pm_numbered_reference_read_node_t *
6194 pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
6195  assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
6196  pm_numbered_reference_read_node_t *node = PM_NODE_ALLOC(parser, pm_numbered_reference_read_node_t);
6197 
6198  *node = (pm_numbered_reference_read_node_t) {
6199  {
6200  .type = PM_NUMBERED_REFERENCE_READ_NODE,
6201  .node_id = PM_NODE_IDENTIFY(parser),
6202  .location = PM_LOCATION_TOKEN_VALUE(name),
6203  },
6204  .number = pm_numbered_reference_read_node_number(parser, name)
6205  };
6206 
6207  return node;
6208 }
6209 
6213 static pm_optional_parameter_node_t *
6214 pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
6215  pm_optional_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_optional_parameter_node_t);
6216 
6217  *node = (pm_optional_parameter_node_t) {
6218  {
6219  .type = PM_OPTIONAL_PARAMETER_NODE,
6220  .node_id = PM_NODE_IDENTIFY(parser),
6221  .location = {
6222  .start = name->start,
6223  .end = value->location.end
6224  }
6225  },
6226  .name = pm_parser_constant_id_token(parser, name),
6227  .name_loc = PM_LOCATION_TOKEN_VALUE(name),
6228  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6229  .value = value
6230  };
6231 
6232  return node;
6233 }
6234 
6238 static pm_or_node_t *
6239 pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6240  pm_assert_value_expression(parser, left);
6241 
6242  pm_or_node_t *node = PM_NODE_ALLOC(parser, pm_or_node_t);
6243 
6244  *node = (pm_or_node_t) {
6245  {
6246  .type = PM_OR_NODE,
6247  .node_id = PM_NODE_IDENTIFY(parser),
6248  .location = {
6249  .start = left->location.start,
6250  .end = right->location.end
6251  }
6252  },
6253  .left = left,
6254  .right = right,
6255  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6256  };
6257 
6258  return node;
6259 }
6260 
6264 static pm_parameters_node_t *
6265 pm_parameters_node_create(pm_parser_t *parser) {
6266  pm_parameters_node_t *node = PM_NODE_ALLOC(parser, pm_parameters_node_t);
6267 
6268  *node = (pm_parameters_node_t) {
6269  {
6270  .type = PM_PARAMETERS_NODE,
6271  .node_id = PM_NODE_IDENTIFY(parser),
6272  .location = PM_LOCATION_TOKEN_VALUE(&parser->current)
6273  },
6274  .rest = NULL,
6275  .keyword_rest = NULL,
6276  .block = NULL,
6277  .requireds = { 0 },
6278  .optionals = { 0 },
6279  .posts = { 0 },
6280  .keywords = { 0 }
6281  };
6282 
6283  return node;
6284 }
6285 
6289 static void
6290 pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
6291  if (params->base.location.start == NULL) {
6292  params->base.location.start = param->location.start;
6293  } else {
6294  params->base.location.start = params->base.location.start < param->location.start ? params->base.location.start : param->location.start;
6295  }
6296 
6297  if (params->base.location.end == NULL) {
6298  params->base.location.end = param->location.end;
6299  } else {
6300  params->base.location.end = params->base.location.end > param->location.end ? params->base.location.end : param->location.end;
6301  }
6302 }
6303 
6307 static void
6308 pm_parameters_node_requireds_append(pm_parameters_node_t *params, pm_node_t *param) {
6309  pm_parameters_node_location_set(params, param);
6310  pm_node_list_append(&params->requireds, param);
6311 }
6312 
6316 static void
6317 pm_parameters_node_optionals_append(pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
6318  pm_parameters_node_location_set(params, (pm_node_t *) param);
6319  pm_node_list_append(&params->optionals, (pm_node_t *) param);
6320 }
6321 
6325 static void
6326 pm_parameters_node_posts_append(pm_parameters_node_t *params, pm_node_t *param) {
6327  pm_parameters_node_location_set(params, param);
6328  pm_node_list_append(&params->posts, param);
6329 }
6330 
6334 static void
6335 pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6336  pm_parameters_node_location_set(params, param);
6337  params->rest = param;
6338 }
6339 
6343 static void
6344 pm_parameters_node_keywords_append(pm_parameters_node_t *params, pm_node_t *param) {
6345  pm_parameters_node_location_set(params, param);
6346  pm_node_list_append(&params->keywords, param);
6347 }
6348 
6352 static void
6353 pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
6354  assert(params->keyword_rest == NULL);
6355  pm_parameters_node_location_set(params, param);
6356  params->keyword_rest = param;
6357 }
6358 
6362 static void
6363 pm_parameters_node_block_set(pm_parameters_node_t *params, pm_block_parameter_node_t *param) {
6364  assert(params->block == NULL);
6365  pm_parameters_node_location_set(params, (pm_node_t *) param);
6366  params->block = param;
6367 }
6368 
6372 static pm_program_node_t *
6373 pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
6374  pm_program_node_t *node = PM_NODE_ALLOC(parser, pm_program_node_t);
6375 
6376  *node = (pm_program_node_t) {
6377  {
6378  .type = PM_PROGRAM_NODE,
6379  .node_id = PM_NODE_IDENTIFY(parser),
6380  .location = {
6381  .start = statements == NULL ? parser->start : statements->base.location.start,
6382  .end = statements == NULL ? parser->end : statements->base.location.end
6383  }
6384  },
6385  .locals = *locals,
6386  .statements = statements
6387  };
6388 
6389  return node;
6390 }
6391 
6395 static pm_parentheses_node_t *
6396 pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing) {
6397  pm_parentheses_node_t *node = PM_NODE_ALLOC(parser, pm_parentheses_node_t);
6398 
6399  *node = (pm_parentheses_node_t) {
6400  {
6401  .type = PM_PARENTHESES_NODE,
6402  .node_id = PM_NODE_IDENTIFY(parser),
6403  .location = {
6404  .start = opening->start,
6405  .end = closing->end
6406  }
6407  },
6408  .body = body,
6409  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6410  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6411  };
6412 
6413  return node;
6414 }
6415 
6419 static pm_pinned_expression_node_t *
6420 pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
6421  pm_pinned_expression_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_expression_node_t);
6422 
6423  *node = (pm_pinned_expression_node_t) {
6424  {
6425  .type = PM_PINNED_EXPRESSION_NODE,
6426  .node_id = PM_NODE_IDENTIFY(parser),
6427  .location = {
6428  .start = operator->start,
6429  .end = rparen->end
6430  }
6431  },
6432  .expression = expression,
6433  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6434  .lparen_loc = PM_LOCATION_TOKEN_VALUE(lparen),
6435  .rparen_loc = PM_LOCATION_TOKEN_VALUE(rparen)
6436  };
6437 
6438  return node;
6439 }
6440 
6444 static pm_pinned_variable_node_t *
6445 pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
6446  pm_pinned_variable_node_t *node = PM_NODE_ALLOC(parser, pm_pinned_variable_node_t);
6447 
6448  *node = (pm_pinned_variable_node_t) {
6449  {
6450  .type = PM_PINNED_VARIABLE_NODE,
6451  .node_id = PM_NODE_IDENTIFY(parser),
6452  .location = {
6453  .start = operator->start,
6454  .end = variable->location.end
6455  }
6456  },
6457  .variable = variable,
6458  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6459  };
6460 
6461  return node;
6462 }
6463 
6467 static pm_post_execution_node_t *
6468 pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6469  pm_post_execution_node_t *node = PM_NODE_ALLOC(parser, pm_post_execution_node_t);
6470 
6471  *node = (pm_post_execution_node_t) {
6472  {
6473  .type = PM_POST_EXECUTION_NODE,
6474  .node_id = PM_NODE_IDENTIFY(parser),
6475  .location = {
6476  .start = keyword->start,
6477  .end = closing->end
6478  }
6479  },
6480  .statements = statements,
6481  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6482  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6483  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6484  };
6485 
6486  return node;
6487 }
6488 
6492 static pm_pre_execution_node_t *
6493 pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
6494  pm_pre_execution_node_t *node = PM_NODE_ALLOC(parser, pm_pre_execution_node_t);
6495 
6496  *node = (pm_pre_execution_node_t) {
6497  {
6498  .type = PM_PRE_EXECUTION_NODE,
6499  .node_id = PM_NODE_IDENTIFY(parser),
6500  .location = {
6501  .start = keyword->start,
6502  .end = closing->end
6503  }
6504  },
6505  .statements = statements,
6506  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6507  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6508  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing)
6509  };
6510 
6511  return node;
6512 }
6513 
6517 static pm_range_node_t *
6518 pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
6519  pm_assert_value_expression(parser, left);
6520  pm_assert_value_expression(parser, right);
6521 
6522  pm_range_node_t *node = PM_NODE_ALLOC(parser, pm_range_node_t);
6523  pm_node_flags_t flags = 0;
6524 
6525  // Indicate that this node is an exclusive range if the operator is `...`.
6526  if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
6527  flags |= PM_RANGE_FLAGS_EXCLUDE_END;
6528  }
6529 
6530  // Indicate that this node is a static literal (i.e., can be compiled with
6531  // a putobject in CRuby) if the left and right are implicit nil, explicit
6532  // nil, or integers.
6533  if (
6534  (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
6535  (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
6536  ) {
6537  flags |= PM_NODE_FLAG_STATIC_LITERAL;
6538  }
6539 
6540  *node = (pm_range_node_t) {
6541  {
6542  .type = PM_RANGE_NODE,
6543  .flags = flags,
6544  .node_id = PM_NODE_IDENTIFY(parser),
6545  .location = {
6546  .start = (left == NULL ? operator->start : left->location.start),
6547  .end = (right == NULL ? operator->end : right->location.end)
6548  }
6549  },
6550  .left = left,
6551  .right = right,
6552  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6553  };
6554 
6555  return node;
6556 }
6557 
6561 static pm_redo_node_t *
6562 pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
6563  assert(token->type == PM_TOKEN_KEYWORD_REDO);
6564  pm_redo_node_t *node = PM_NODE_ALLOC(parser, pm_redo_node_t);
6565 
6566  *node = (pm_redo_node_t) {{
6567  .type = PM_REDO_NODE,
6568  .node_id = PM_NODE_IDENTIFY(parser),
6569  .location = PM_LOCATION_TOKEN_VALUE(token)
6570  }};
6571 
6572  return node;
6573 }
6574 
6579 static pm_regular_expression_node_t *
6580 pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6581  pm_regular_expression_node_t *node = PM_NODE_ALLOC(parser, pm_regular_expression_node_t);
6582 
6583  *node = (pm_regular_expression_node_t) {
6584  {
6585  .type = PM_REGULAR_EXPRESSION_NODE,
6586  .flags = pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
6587  .node_id = PM_NODE_IDENTIFY(parser),
6588  .location = {
6589  .start = MIN(opening->start, closing->start),
6590  .end = MAX(opening->end, closing->end)
6591  }
6592  },
6593  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
6594  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
6595  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
6596  .unescaped = *unescaped
6597  };
6598 
6599  return node;
6600 }
6601 
6605 static inline pm_regular_expression_node_t *
6606 pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6607  return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6608 }
6609 
6613 static pm_required_parameter_node_t *
6614 pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
6615  pm_required_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_required_parameter_node_t);
6616 
6617  *node = (pm_required_parameter_node_t) {
6618  {
6619  .type = PM_REQUIRED_PARAMETER_NODE,
6620  .node_id = PM_NODE_IDENTIFY(parser),
6621  .location = PM_LOCATION_TOKEN_VALUE(token)
6622  },
6623  .name = pm_parser_constant_id_token(parser, token)
6624  };
6625 
6626  return node;
6627 }
6628 
6632 static pm_rescue_modifier_node_t *
6633 pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
6634  pm_rescue_modifier_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_modifier_node_t);
6635 
6636  *node = (pm_rescue_modifier_node_t) {
6637  {
6638  .type = PM_RESCUE_MODIFIER_NODE,
6639  .node_id = PM_NODE_IDENTIFY(parser),
6640  .location = {
6641  .start = expression->location.start,
6642  .end = rescue_expression->location.end
6643  }
6644  },
6645  .expression = expression,
6646  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6647  .rescue_expression = rescue_expression
6648  };
6649 
6650  return node;
6651 }
6652 
6656 static pm_rescue_node_t *
6657 pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6658  pm_rescue_node_t *node = PM_NODE_ALLOC(parser, pm_rescue_node_t);
6659 
6660  *node = (pm_rescue_node_t) {
6661  {
6662  .type = PM_RESCUE_NODE,
6663  .node_id = PM_NODE_IDENTIFY(parser),
6664  .location = PM_LOCATION_TOKEN_VALUE(keyword)
6665  },
6666  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6667  .operator_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
6668  .reference = NULL,
6669  .statements = NULL,
6670  .subsequent = NULL,
6671  .exceptions = { 0 }
6672  };
6673 
6674  return node;
6675 }
6676 
6677 static inline void
6678 pm_rescue_node_operator_set(pm_rescue_node_t *node, const pm_token_t *operator) {
6679  node->operator_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
6680 }
6681 
6685 static void
6686 pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
6687  node->reference = reference;
6688  node->base.location.end = reference->location.end;
6689 }
6690 
6694 static void
6695 pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6696  node->statements = statements;
6697  if (pm_statements_node_body_length(statements) > 0) {
6698  node->base.location.end = statements->base.location.end;
6699  }
6700 }
6701 
6705 static void
6706 pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6707  node->subsequent = subsequent;
6708  node->base.location.end = subsequent->base.location.end;
6709 }
6710 
6714 static void
6715 pm_rescue_node_exceptions_append(pm_rescue_node_t *node, pm_node_t *exception) {
6716  pm_node_list_append(&node->exceptions, exception);
6717  node->base.location.end = exception->location.end;
6718 }
6719 
6723 static pm_rest_parameter_node_t *
6724 pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6725  pm_rest_parameter_node_t *node = PM_NODE_ALLOC(parser, pm_rest_parameter_node_t);
6726 
6727  *node = (pm_rest_parameter_node_t) {
6728  {
6729  .type = PM_REST_PARAMETER_NODE,
6730  .node_id = PM_NODE_IDENTIFY(parser),
6731  .location = {
6732  .start = operator->start,
6733  .end = (name->type == PM_TOKEN_NOT_PROVIDED ? operator->end : name->end)
6734  }
6735  },
6736  .name = pm_parser_optional_constant_id_token(parser, name),
6737  .name_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(name),
6738  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
6739  };
6740 
6741  return node;
6742 }
6743 
6747 static pm_retry_node_t *
6748 pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6749  assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6750  pm_retry_node_t *node = PM_NODE_ALLOC(parser, pm_retry_node_t);
6751 
6752  *node = (pm_retry_node_t) {{
6753  .type = PM_RETRY_NODE,
6754  .node_id = PM_NODE_IDENTIFY(parser),
6755  .location = PM_LOCATION_TOKEN_VALUE(token)
6756  }};
6757 
6758  return node;
6759 }
6760 
6764 static pm_return_node_t *
6765 pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6766  pm_return_node_t *node = PM_NODE_ALLOC(parser, pm_return_node_t);
6767 
6768  *node = (pm_return_node_t) {
6769  {
6770  .type = PM_RETURN_NODE,
6771  .node_id = PM_NODE_IDENTIFY(parser),
6772  .location = {
6773  .start = keyword->start,
6774  .end = (arguments == NULL ? keyword->end : arguments->base.location.end)
6775  }
6776  },
6777  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
6778  .arguments = arguments
6779  };
6780 
6781  return node;
6782 }
6783 
6787 static pm_self_node_t *
6788 pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6789  assert(token->type == PM_TOKEN_KEYWORD_SELF);
6790  pm_self_node_t *node = PM_NODE_ALLOC(parser, pm_self_node_t);
6791 
6792  *node = (pm_self_node_t) {{
6793  .type = PM_SELF_NODE,
6794  .node_id = PM_NODE_IDENTIFY(parser),
6795  .location = PM_LOCATION_TOKEN_VALUE(token)
6796  }};
6797 
6798  return node;
6799 }
6800 
6804 static pm_shareable_constant_node_t *
6805 pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6806  pm_shareable_constant_node_t *node = PM_NODE_ALLOC(parser, pm_shareable_constant_node_t);
6807 
6808  *node = (pm_shareable_constant_node_t) {
6809  {
6810  .type = PM_SHAREABLE_CONSTANT_NODE,
6811  .flags = (pm_node_flags_t) value,
6812  .node_id = PM_NODE_IDENTIFY(parser),
6813  .location = PM_LOCATION_NODE_VALUE(write)
6814  },
6815  .write = write
6816  };
6817 
6818  return node;
6819 }
6820 
6824 static pm_singleton_class_node_t *
6825 pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6826  pm_singleton_class_node_t *node = PM_NODE_ALLOC(parser, pm_singleton_class_node_t);
6827 
6828  *node = (pm_singleton_class_node_t) {
6829  {
6830  .type = PM_SINGLETON_CLASS_NODE,
6831  .node_id = PM_NODE_IDENTIFY(parser),
6832  .location = {
6833  .start = class_keyword->start,
6834  .end = end_keyword->end
6835  }
6836  },
6837  .locals = *locals,
6838  .class_keyword_loc = PM_LOCATION_TOKEN_VALUE(class_keyword),
6839  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6840  .expression = expression,
6841  .body = body,
6842  .end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword)
6843  };
6844 
6845  return node;
6846 }
6847 
6851 static pm_source_encoding_node_t *
6852 pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6853  assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6854  pm_source_encoding_node_t *node = PM_NODE_ALLOC(parser, pm_source_encoding_node_t);
6855 
6856  *node = (pm_source_encoding_node_t) {{
6857  .type = PM_SOURCE_ENCODING_NODE,
6858  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6859  .node_id = PM_NODE_IDENTIFY(parser),
6860  .location = PM_LOCATION_TOKEN_VALUE(token)
6861  }};
6862 
6863  return node;
6864 }
6865 
6869 static pm_source_file_node_t*
6870 pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6871  pm_source_file_node_t *node = PM_NODE_ALLOC(parser, pm_source_file_node_t);
6872  assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6873 
6874  pm_node_flags_t flags = 0;
6875 
6876  switch (parser->frozen_string_literal) {
6877  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6878  flags |= PM_STRING_FLAGS_MUTABLE;
6879  break;
6880  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6881  flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6882  break;
6883  }
6884 
6885  *node = (pm_source_file_node_t) {
6886  {
6887  .type = PM_SOURCE_FILE_NODE,
6888  .flags = flags,
6889  .node_id = PM_NODE_IDENTIFY(parser),
6890  .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
6891  },
6892  .filepath = parser->filepath
6893  };
6894 
6895  return node;
6896 }
6897 
6901 static pm_source_line_node_t *
6902 pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6903  assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6904  pm_source_line_node_t *node = PM_NODE_ALLOC(parser, pm_source_line_node_t);
6905 
6906  *node = (pm_source_line_node_t) {{
6907  .type = PM_SOURCE_LINE_NODE,
6908  .flags = PM_NODE_FLAG_STATIC_LITERAL,
6909  .node_id = PM_NODE_IDENTIFY(parser),
6910  .location = PM_LOCATION_TOKEN_VALUE(token)
6911  }};
6912 
6913  return node;
6914 }
6915 
6919 static pm_splat_node_t *
6920 pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6921  pm_splat_node_t *node = PM_NODE_ALLOC(parser, pm_splat_node_t);
6922 
6923  *node = (pm_splat_node_t) {
6924  {
6925  .type = PM_SPLAT_NODE,
6926  .node_id = PM_NODE_IDENTIFY(parser),
6927  .location = {
6928  .start = operator->start,
6929  .end = (expression == NULL ? operator->end : expression->location.end)
6930  }
6931  },
6932  .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
6933  .expression = expression
6934  };
6935 
6936  return node;
6937 }
6938 
6942 static pm_statements_node_t *
6943 pm_statements_node_create(pm_parser_t *parser) {
6944  pm_statements_node_t *node = PM_NODE_ALLOC(parser, pm_statements_node_t);
6945 
6946  *node = (pm_statements_node_t) {
6947  {
6948  .type = PM_STATEMENTS_NODE,
6949  .node_id = PM_NODE_IDENTIFY(parser),
6950  .location = PM_LOCATION_NULL_VALUE(parser)
6951  },
6952  .body = { 0 }
6953  };
6954 
6955  return node;
6956 }
6957 
6961 static size_t
6962 pm_statements_node_body_length(pm_statements_node_t *node) {
6963  return node && node->body.size;
6964 }
6965 
6969 static void
6970 pm_statements_node_location_set(pm_statements_node_t *node, const uint8_t *start, const uint8_t *end) {
6971  node->base.location = (pm_location_t) { .start = start, .end = end };
6972 }
6973 
6978 static inline void
6979 pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6980  if (pm_statements_node_body_length(node) == 0 || statement->location.start < node->base.location.start) {
6981  node->base.location.start = statement->location.start;
6982  }
6983 
6984  if (statement->location.end > node->base.location.end) {
6985  node->base.location.end = statement->location.end;
6986  }
6987 }
6988 
6992 static void
6993 pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6994  pm_statements_node_body_update(node, statement);
6995 
6996  if (node->body.size > 0) {
6997  const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6998 
6999  switch (PM_NODE_TYPE(previous)) {
7000  case PM_BREAK_NODE:
7001  case PM_NEXT_NODE:
7002  case PM_REDO_NODE:
7003  case PM_RETRY_NODE:
7004  case PM_RETURN_NODE:
7005  pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
7006  break;
7007  default:
7008  break;
7009  }
7010  }
7011 
7012  pm_node_list_append(&node->body, statement);
7013  if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7014 }
7015 
7019 static void
7020 pm_statements_node_body_prepend(pm_statements_node_t *node, pm_node_t *statement) {
7021  pm_statements_node_body_update(node, statement);
7022  pm_node_list_prepend(&node->body, statement);
7023  pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
7024 }
7025 
7029 static inline pm_string_node_t *
7030 pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
7031  pm_string_node_t *node = PM_NODE_ALLOC(parser, pm_string_node_t);
7032  pm_node_flags_t flags = 0;
7033 
7034  switch (parser->frozen_string_literal) {
7035  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7036  flags = PM_STRING_FLAGS_MUTABLE;
7037  break;
7038  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7039  flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7040  break;
7041  }
7042 
7043  *node = (pm_string_node_t) {
7044  {
7045  .type = PM_STRING_NODE,
7046  .flags = flags,
7047  .node_id = PM_NODE_IDENTIFY(parser),
7048  .location = {
7049  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? content->start : opening->start),
7050  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? content->end : closing->end)
7051  }
7052  },
7053  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7054  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7055  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7056  .unescaped = *string
7057  };
7058 
7059  return node;
7060 }
7061 
7065 static pm_string_node_t *
7066 pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7067  return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7068 }
7069 
7074 static pm_string_node_t *
7075 pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7076  pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
7077  parser->current_string = PM_STRING_EMPTY;
7078  return node;
7079 }
7080 
7084 static pm_super_node_t *
7085 pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
7086  assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
7087  pm_super_node_t *node = PM_NODE_ALLOC(parser, pm_super_node_t);
7088 
7089  const uint8_t *end = pm_arguments_end(arguments);
7090  if (end == NULL) {
7091  assert(false && "unreachable");
7092  }
7093 
7094  *node = (pm_super_node_t) {
7095  {
7096  .type = PM_SUPER_NODE,
7097  .node_id = PM_NODE_IDENTIFY(parser),
7098  .location = {
7099  .start = keyword->start,
7100  .end = end,
7101  }
7102  },
7103  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7104  .lparen_loc = arguments->opening_loc,
7105  .arguments = arguments->arguments,
7106  .rparen_loc = arguments->closing_loc,
7107  .block = arguments->block
7108  };
7109 
7110  return node;
7111 }
7112 
7117 static bool
7118 pm_ascii_only_p(const pm_string_t *contents) {
7119  const size_t length = pm_string_length(contents);
7120  const uint8_t *source = pm_string_source(contents);
7121 
7122  for (size_t index = 0; index < length; index++) {
7123  if (source[index] & 0x80) return false;
7124  }
7125 
7126  return true;
7127 }
7128 
7132 static void
7133 parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7134  for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7135  size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
7136 
7137  if (width == 0) {
7138  pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7139  break;
7140  }
7141 
7142  cursor += width;
7143  }
7144 }
7145 
7150 static void
7151 parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
7152  const pm_encoding_t *encoding = parser->encoding;
7153 
7154  for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
7155  size_t width = encoding->char_width(cursor, end - cursor);
7156 
7157  if (width == 0) {
7158  pm_parser_err(parser, location->start, location->end, PM_ERR_INVALID_SYMBOL);
7159  break;
7160  }
7161 
7162  cursor += width;
7163  }
7164 }
7165 
7175 static inline pm_node_flags_t
7176 parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
7177  if (parser->explicit_encoding != NULL) {
7178  // A Symbol may optionally have its encoding explicitly set. This will
7179  // happen if an escape sequence results in a non-ASCII code point.
7180  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7181  if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
7182  return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
7183  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7184  return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
7185  } else if (validate) {
7186  parse_symbol_encoding_validate_other(parser, location, contents);
7187  }
7188  } else if (pm_ascii_only_p(contents)) {
7189  // Ruby stipulates that all source files must use an ASCII-compatible
7190  // encoding. Thus, all symbols appearing in source are eligible for
7191  // "downgrading" to US-ASCII.
7192  return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
7193  } else if (validate) {
7194  parse_symbol_encoding_validate_other(parser, location, contents);
7195  }
7196 
7197  return 0;
7198 }
7199 
7200 static pm_node_flags_t
7201 parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
7202  assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
7203  (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
7204  (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
7205  (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
7206 
7207  // There's special validation logic used if a string does not contain any character escape sequences.
7208  if (parser->explicit_encoding == NULL) {
7209  // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
7210  // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
7211  // the US-ASCII encoding.
7212  if (ascii_only) {
7213  return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
7214  }
7215 
7216  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7217  if (!ascii_only) {
7218  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7219  }
7220  } else if (parser->encoding != modifier_encoding) {
7221  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
7222 
7223  if (modifier == 'n' && !ascii_only) {
7224  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
7225  }
7226  }
7227 
7228  return flags;
7229  }
7230 
7231  // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
7232  bool mixed_encoding = false;
7233 
7234  if (mixed_encoding) {
7235  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7236  } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
7237  // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
7238  bool valid_string_in_modifier_encoding = true;
7239 
7240  if (!valid_string_in_modifier_encoding) {
7241  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7242  }
7243  } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7244  // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
7245  if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
7246  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
7247  }
7248  }
7249 
7250  // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
7251  return flags;
7252 }
7253 
7260 static pm_node_flags_t
7261 parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
7262  // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
7263  bool valid_unicode_range = true;
7264  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
7265  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
7266  return flags;
7267  }
7268 
7269  // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
7270  // to multi-byte characters are allowed.
7271  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
7272  // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
7273  // following error message appearing twice. We do the same for compatibility.
7274  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
7275  }
7276 
7285  if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
7286  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
7287  }
7288 
7289  if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
7290  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
7291  }
7292 
7293  if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
7294  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
7295  }
7296 
7297  if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
7298  return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
7299  }
7300 
7301  // At this point no encoding modifiers will be present on the regular expression as they would have already
7302  // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
7303  // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
7304  if (ascii_only) {
7305  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
7306  }
7307 
7308  // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
7309  // or by specifying a modifier.
7310  //
7311  // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
7312  if (parser->explicit_encoding != NULL) {
7313  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
7314  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
7315  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
7316  return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
7317  }
7318  }
7319 
7320  return 0;
7321 }
7322 
7327 static pm_symbol_node_t *
7328 pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
7329  pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7330 
7331  *node = (pm_symbol_node_t) {
7332  {
7333  .type = PM_SYMBOL_NODE,
7334  .flags = PM_NODE_FLAG_STATIC_LITERAL | flags,
7335  .node_id = PM_NODE_IDENTIFY(parser),
7336  .location = {
7337  .start = (opening->type == PM_TOKEN_NOT_PROVIDED ? value->start : opening->start),
7338  .end = (closing->type == PM_TOKEN_NOT_PROVIDED ? value->end : closing->end)
7339  }
7340  },
7341  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7342  .value_loc = PM_LOCATION_TOKEN_VALUE(value),
7343  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7344  .unescaped = *unescaped
7345  };
7346 
7347  return node;
7348 }
7349 
7353 static inline pm_symbol_node_t *
7354 pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7355  return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
7356 }
7357 
7361 static pm_symbol_node_t *
7362 pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
7363  pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
7364  parser->current_string = PM_STRING_EMPTY;
7365  return node;
7366 }
7367 
7371 static pm_symbol_node_t *
7372 pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
7373  pm_symbol_node_t *node;
7374 
7375  switch (token->type) {
7376  case PM_TOKEN_LABEL: {
7377  pm_token_t opening = not_provided(parser);
7378  pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
7379 
7380  pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
7381  node = pm_symbol_node_create(parser, &opening, &label, &closing);
7382 
7383  assert((label.end - label.start) >= 0);
7384  pm_string_shared_init(&node->unescaped, label.start, label.end);
7385  pm_node_flag_set((pm_node_t *) node, parse_symbol_encoding(parser, &label, &node->unescaped, false));
7386 
7387  break;
7388  }
7389  case PM_TOKEN_MISSING: {
7390  pm_token_t opening = not_provided(parser);
7391  pm_token_t closing = not_provided(parser);
7392 
7393  pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end };
7394  node = pm_symbol_node_create(parser, &opening, &label, &closing);
7395  break;
7396  }
7397  default:
7398  assert(false && "unreachable");
7399  node = NULL;
7400  break;
7401  }
7402 
7403  return node;
7404 }
7405 
7409 static pm_symbol_node_t *
7410 pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
7411  pm_symbol_node_t *node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7412 
7413  *node = (pm_symbol_node_t) {
7414  {
7415  .type = PM_SYMBOL_NODE,
7416  .flags = PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
7417  .node_id = PM_NODE_IDENTIFY(parser),
7418  .location = PM_LOCATION_NULL_VALUE(parser)
7419  },
7420  .value_loc = PM_LOCATION_NULL_VALUE(parser),
7421  .unescaped = { 0 }
7422  };
7423 
7424  pm_string_constant_init(&node->unescaped, content, strlen(content));
7425  return node;
7426 }
7427 
7431 static bool
7432 pm_symbol_node_label_p(pm_node_t *node) {
7433  const uint8_t *end = NULL;
7434 
7435  switch (PM_NODE_TYPE(node)) {
7436  case PM_SYMBOL_NODE:
7437  end = ((pm_symbol_node_t *) node)->closing_loc.end;
7438  break;
7439  case PM_INTERPOLATED_SYMBOL_NODE:
7440  end = ((pm_interpolated_symbol_node_t *) node)->closing_loc.end;
7441  break;
7442  default:
7443  return false;
7444  }
7445 
7446  return (end != NULL) && (end[-1] == ':');
7447 }
7448 
7452 static pm_symbol_node_t *
7453 pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
7454  pm_symbol_node_t *new_node = PM_NODE_ALLOC(parser, pm_symbol_node_t);
7455 
7456  *new_node = (pm_symbol_node_t) {
7457  {
7458  .type = PM_SYMBOL_NODE,
7459  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7460  .node_id = PM_NODE_IDENTIFY(parser),
7461  .location = {
7462  .start = opening->start,
7463  .end = closing->end
7464  }
7465  },
7466  .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
7467  .value_loc = node->content_loc,
7468  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7469  .unescaped = node->unescaped
7470  };
7471 
7472  pm_token_t content = { .type = PM_TOKEN_IDENTIFIER, .start = node->content_loc.start, .end = node->content_loc.end };
7473  pm_node_flag_set((pm_node_t *) new_node, parse_symbol_encoding(parser, &content, &node->unescaped, true));
7474 
7475  // We are explicitly _not_ using pm_node_destroy here because we don't want
7476  // to trash the unescaped string. We could instead copy the string if we
7477  // know that it is owned, but we're taking the fast path for now.
7478  xfree(node);
7479 
7480  return new_node;
7481 }
7482 
7486 static pm_string_node_t *
7487 pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
7488  pm_string_node_t *new_node = PM_NODE_ALLOC(parser, pm_string_node_t);
7489  pm_node_flags_t flags = 0;
7490 
7491  switch (parser->frozen_string_literal) {
7492  case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
7493  flags = PM_STRING_FLAGS_MUTABLE;
7494  break;
7495  case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
7496  flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
7497  break;
7498  }
7499 
7500  *new_node = (pm_string_node_t) {
7501  {
7502  .type = PM_STRING_NODE,
7503  .flags = flags,
7504  .node_id = PM_NODE_IDENTIFY(parser),
7505  .location = node->base.location
7506  },
7507  .opening_loc = node->opening_loc,
7508  .content_loc = node->value_loc,
7509  .closing_loc = node->closing_loc,
7510  .unescaped = node->unescaped
7511  };
7512 
7513  // We are explicitly _not_ using pm_node_destroy here because we don't want
7514  // to trash the unescaped string. We could instead copy the string if we
7515  // know that it is owned, but we're taking the fast path for now.
7516  xfree(node);
7517 
7518  return new_node;
7519 }
7520 
7524 static pm_true_node_t *
7525 pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
7526  assert(token->type == PM_TOKEN_KEYWORD_TRUE);
7527  pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7528 
7529  *node = (pm_true_node_t) {{
7530  .type = PM_TRUE_NODE,
7531  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7532  .node_id = PM_NODE_IDENTIFY(parser),
7533  .location = PM_LOCATION_TOKEN_VALUE(token)
7534  }};
7535 
7536  return node;
7537 }
7538 
7542 static pm_true_node_t *
7543 pm_true_node_synthesized_create(pm_parser_t *parser) {
7544  pm_true_node_t *node = PM_NODE_ALLOC(parser, pm_true_node_t);
7545 
7546  *node = (pm_true_node_t) {{
7547  .type = PM_TRUE_NODE,
7548  .flags = PM_NODE_FLAG_STATIC_LITERAL,
7549  .node_id = PM_NODE_IDENTIFY(parser),
7550  .location = { .start = parser->start, .end = parser->end }
7551  }};
7552 
7553  return node;
7554 }
7555 
7559 static pm_undef_node_t *
7560 pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
7561  assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
7562  pm_undef_node_t *node = PM_NODE_ALLOC(parser, pm_undef_node_t);
7563 
7564  *node = (pm_undef_node_t) {
7565  {
7566  .type = PM_UNDEF_NODE,
7567  .node_id = PM_NODE_IDENTIFY(parser),
7568  .location = PM_LOCATION_TOKEN_VALUE(token),
7569  },
7570  .keyword_loc = PM_LOCATION_TOKEN_VALUE(token),
7571  .names = { 0 }
7572  };
7573 
7574  return node;
7575 }
7576 
7580 static void
7581 pm_undef_node_append(pm_undef_node_t *node, pm_node_t *name) {
7582  node->base.location.end = name->location.end;
7583  pm_node_list_append(&node->names, name);
7584 }
7585 
7589 static pm_unless_node_t *
7590 pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
7591  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7592  pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7593 
7594  const uint8_t *end;
7595  if (statements != NULL) {
7596  end = statements->base.location.end;
7597  } else {
7598  end = predicate->location.end;
7599  }
7600 
7601  *node = (pm_unless_node_t) {
7602  {
7603  .type = PM_UNLESS_NODE,
7604  .flags = PM_NODE_FLAG_NEWLINE,
7605  .node_id = PM_NODE_IDENTIFY(parser),
7606  .location = {
7607  .start = keyword->start,
7608  .end = end
7609  },
7610  },
7611  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7612  .predicate = predicate,
7613  .then_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(then_keyword),
7614  .statements = statements,
7615  .else_clause = NULL,
7616  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7617  };
7618 
7619  return node;
7620 }
7621 
7625 static pm_unless_node_t *
7626 pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
7627  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7628  pm_unless_node_t *node = PM_NODE_ALLOC(parser, pm_unless_node_t);
7629 
7630  pm_statements_node_t *statements = pm_statements_node_create(parser);
7631  pm_statements_node_body_append(parser, statements, statement, true);
7632 
7633  *node = (pm_unless_node_t) {
7634  {
7635  .type = PM_UNLESS_NODE,
7636  .flags = PM_NODE_FLAG_NEWLINE,
7637  .node_id = PM_NODE_IDENTIFY(parser),
7638  .location = {
7639  .start = statement->location.start,
7640  .end = predicate->location.end
7641  },
7642  },
7643  .keyword_loc = PM_LOCATION_TOKEN_VALUE(unless_keyword),
7644  .predicate = predicate,
7645  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7646  .statements = statements,
7647  .else_clause = NULL,
7648  .end_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE
7649  };
7650 
7651  return node;
7652 }
7653 
7654 static inline void
7655 pm_unless_node_end_keyword_loc_set(pm_unless_node_t *node, const pm_token_t *end_keyword) {
7656  node->end_keyword_loc = PM_LOCATION_TOKEN_VALUE(end_keyword);
7657  node->base.location.end = end_keyword->end;
7658 }
7659 
7665 static void
7666 pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
7667  assert(parser->current_block_exits != NULL);
7668 
7669  // All of the block exits that we want to remove should be within the
7670  // statements, and since we are modifying the statements, we shouldn't have
7671  // to check the end location.
7672  const uint8_t *start = statements->base.location.start;
7673 
7674  for (size_t index = parser->current_block_exits->size; index > 0; index--) {
7675  pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
7676  if (block_exit->location.start < start) break;
7677 
7678  // Implicitly remove from the list by lowering the size.
7679  parser->current_block_exits->size--;
7680  }
7681 }
7682 
7686 static pm_until_node_t *
7687 pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690 
7691  *node = (pm_until_node_t) {
7692  {
7693  .type = PM_UNTIL_NODE,
7694  .flags = flags,
7695  .node_id = PM_NODE_IDENTIFY(parser),
7696  .location = {
7697  .start = keyword->start,
7698  .end = closing->end,
7699  },
7700  },
7701  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7703  .predicate = predicate,
7704  .statements = statements
7705  };
7706 
7707  return node;
7708 }
7709 
7713 static pm_until_node_t *
7714 pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7715  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7716  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7717  pm_loop_modifier_block_exits(parser, statements);
7718 
7719  *node = (pm_until_node_t) {
7720  {
7721  .type = PM_UNTIL_NODE,
7722  .flags = flags,
7723  .node_id = PM_NODE_IDENTIFY(parser),
7724  .location = {
7725  .start = statements->base.location.start,
7726  .end = predicate->location.end,
7727  },
7728  },
7729  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7730  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7731  .predicate = predicate,
7732  .statements = statements
7733  };
7734 
7735  return node;
7736 }
7737 
7741 static pm_when_node_t *
7742 pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
7743  pm_when_node_t *node = PM_NODE_ALLOC(parser, pm_when_node_t);
7744 
7745  *node = (pm_when_node_t) {
7746  {
7747  .type = PM_WHEN_NODE,
7748  .node_id = PM_NODE_IDENTIFY(parser),
7749  .location = {
7750  .start = keyword->start,
7751  .end = NULL
7752  }
7753  },
7754  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7755  .statements = NULL,
7756  .then_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7757  .conditions = { 0 }
7758  };
7759 
7760  return node;
7761 }
7762 
7766 static void
7767 pm_when_node_conditions_append(pm_when_node_t *node, pm_node_t *condition) {
7768  node->base.location.end = condition->location.end;
7769  pm_node_list_append(&node->conditions, condition);
7770 }
7771 
7775 static inline void
7776 pm_when_node_then_keyword_loc_set(pm_when_node_t *node, const pm_token_t *then_keyword) {
7777  node->base.location.end = then_keyword->end;
7778  node->then_keyword_loc = PM_LOCATION_TOKEN_VALUE(then_keyword);
7779 }
7780 
7784 static void
7785 pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
7786  if (statements->base.location.end > node->base.location.end) {
7787  node->base.location.end = statements->base.location.end;
7788  }
7789 
7790  node->statements = statements;
7791 }
7792 
7796 static pm_while_node_t *
7797 pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7798  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7799  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7800 
7801  *node = (pm_while_node_t) {
7802  {
7803  .type = PM_WHILE_NODE,
7804  .flags = flags,
7805  .node_id = PM_NODE_IDENTIFY(parser),
7806  .location = {
7807  .start = keyword->start,
7808  .end = closing->end
7809  },
7810  },
7811  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7812  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7813  .predicate = predicate,
7814  .statements = statements
7815  };
7816 
7817  return node;
7818 }
7819 
7823 static pm_while_node_t *
7824 pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7825  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7826  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7827  pm_loop_modifier_block_exits(parser, statements);
7828 
7829  *node = (pm_while_node_t) {
7830  {
7831  .type = PM_WHILE_NODE,
7832  .flags = flags,
7833  .node_id = PM_NODE_IDENTIFY(parser),
7834  .location = {
7835  .start = statements->base.location.start,
7836  .end = predicate->location.end
7837  },
7838  },
7839  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7840  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7841  .predicate = predicate,
7842  .statements = statements
7843  };
7844 
7845  return node;
7846 }
7847 
7851 static pm_while_node_t *
7852 pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
7853  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7854 
7855  *node = (pm_while_node_t) {
7856  {
7857  .type = PM_WHILE_NODE,
7858  .node_id = PM_NODE_IDENTIFY(parser),
7859  .location = PM_LOCATION_NULL_VALUE(parser)
7860  },
7861  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7862  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7863  .predicate = predicate,
7864  .statements = statements
7865  };
7866 
7867  return node;
7868 }
7869 
7874 static pm_x_string_node_t *
7875 pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
7876  pm_x_string_node_t *node = PM_NODE_ALLOC(parser, pm_x_string_node_t);
7877 
7878  *node = (pm_x_string_node_t) {
7879  {
7880  .type = PM_X_STRING_NODE,
7881  .flags = PM_STRING_FLAGS_FROZEN,
7882  .node_id = PM_NODE_IDENTIFY(parser),
7883  .location = {
7884  .start = opening->start,
7885  .end = closing->end
7886  },
7887  },
7888  .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
7889  .content_loc = PM_LOCATION_TOKEN_VALUE(content),
7890  .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
7891  .unescaped = *unescaped
7892  };
7893 
7894  return node;
7895 }
7896 
7900 static inline pm_x_string_node_t *
7901 pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7902  return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7903 }
7904 
7908 static pm_yield_node_t *
7909 pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7910  pm_yield_node_t *node = PM_NODE_ALLOC(parser, pm_yield_node_t);
7911 
7912  const uint8_t *end;
7913  if (rparen_loc->start != NULL) {
7914  end = rparen_loc->end;
7915  } else if (arguments != NULL) {
7916  end = arguments->base.location.end;
7917  } else if (lparen_loc->start != NULL) {
7918  end = lparen_loc->end;
7919  } else {
7920  end = keyword->end;
7921  }
7922 
7923  *node = (pm_yield_node_t) {
7924  {
7925  .type = PM_YIELD_NODE,
7926  .node_id = PM_NODE_IDENTIFY(parser),
7927  .location = {
7928  .start = keyword->start,
7929  .end = end
7930  },
7931  },
7932  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7933  .lparen_loc = *lparen_loc,
7934  .arguments = arguments,
7935  .rparen_loc = *rparen_loc
7936  };
7937 
7938  return node;
7939 }
7940 
7941 #undef PM_NODE_ALLOC
7942 #undef PM_NODE_IDENTIFY
7943 
7948 static int
7949 pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7950  pm_scope_t *scope = parser->current_scope;
7951  int depth = 0;
7952 
7953  while (scope != NULL) {
7954  if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7955  if (scope->closed) break;
7956 
7957  scope = scope->previous;
7958  depth++;
7959  }
7960 
7961  return -1;
7962 }
7963 
7969 static inline int
7970 pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7971  return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7972 }
7973 
7977 static inline void
7978 pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7979  pm_locals_write(&parser->current_scope->locals, constant_id, start, end, reads);
7980 }
7981 
7985 static pm_constant_id_t
7986 pm_parser_local_add_location(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7987  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, start, end);
7988  if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7989  return constant_id;
7990 }
7991 
7995 static inline pm_constant_id_t
7996 pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7997  return pm_parser_local_add_location(parser, token->start, token->end, reads);
7998 }
7999 
8003 static pm_constant_id_t
8004 pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
8005  pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
8006  if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8007  return constant_id;
8008 }
8009 
8013 static pm_constant_id_t
8014 pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
8015  pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
8016  if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
8017  return constant_id;
8018 }
8019 
8027 static bool
8028 pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
8029  // We want to check whether the parameter name is a numbered parameter or
8030  // not.
8031  pm_refute_numbered_parameter(parser, name->start, name->end);
8032 
8033  // Otherwise we'll fetch the constant id for the parameter name and check
8034  // whether it's already in the current scope.
8035  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
8036 
8037  if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
8038  // Add an error if the parameter doesn't start with _ and has been seen before
8039  if ((name->start < name->end) && (*name->start != '_')) {
8040  pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
8041  }
8042  return true;
8043  }
8044  return false;
8045 }
8046 
8050 static void
8051 pm_parser_scope_pop(pm_parser_t *parser) {
8052  pm_scope_t *scope = parser->current_scope;
8053  parser->current_scope = scope->previous;
8054  pm_locals_free(&scope->locals);
8055  pm_node_list_free(&scope->implicit_parameters);
8056  xfree(scope);
8057 }
8058 
8059 /******************************************************************************/
8060 /* Stack helpers */
8061 /******************************************************************************/
8062 
8066 static inline void
8067 pm_state_stack_push(pm_state_stack_t *stack, bool value) {
8068  *stack = (*stack << 1) | (value & 1);
8069 }
8070 
8074 static inline void
8075 pm_state_stack_pop(pm_state_stack_t *stack) {
8076  *stack >>= 1;
8077 }
8078 
8082 static inline bool
8083 pm_state_stack_p(const pm_state_stack_t *stack) {
8084  return *stack & 1;
8085 }
8086 
8087 static inline void
8088 pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
8089  // Use the negation of the value to prevent stack overflow.
8090  pm_state_stack_push(&parser->accepts_block_stack, !value);
8091 }
8092 
8093 static inline void
8094 pm_accepts_block_stack_pop(pm_parser_t *parser) {
8095  pm_state_stack_pop(&parser->accepts_block_stack);
8096 }
8097 
8098 static inline bool
8099 pm_accepts_block_stack_p(pm_parser_t *parser) {
8100  return !pm_state_stack_p(&parser->accepts_block_stack);
8101 }
8102 
8103 static inline void
8104 pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
8105  pm_state_stack_push(&parser->do_loop_stack, value);
8106 }
8107 
8108 static inline void
8109 pm_do_loop_stack_pop(pm_parser_t *parser) {
8110  pm_state_stack_pop(&parser->do_loop_stack);
8111 }
8112 
8113 static inline bool
8114 pm_do_loop_stack_p(pm_parser_t *parser) {
8115  return pm_state_stack_p(&parser->do_loop_stack);
8116 }
8117 
8118 /******************************************************************************/
8119 /* Lexer check helpers */
8120 /******************************************************************************/
8121 
8126 static inline uint8_t
8127 peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
8128  if (cursor < parser->end) {
8129  return *cursor;
8130  } else {
8131  return '\0';
8132  }
8133 }
8134 
8140 static inline uint8_t
8141 peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
8142  return peek_at(parser, parser->current.end + offset);
8143 }
8144 
8149 static inline uint8_t
8150 peek(const pm_parser_t *parser) {
8151  return peek_at(parser, parser->current.end);
8152 }
8153 
8158 static inline bool
8159 match(pm_parser_t *parser, uint8_t value) {
8160  if (peek(parser) == value) {
8161  parser->current.end++;
8162  return true;
8163  }
8164  return false;
8165 }
8166 
8171 static inline size_t
8172 match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
8173  if (peek_at(parser, cursor) == '\n') {
8174  return 1;
8175  }
8176  if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
8177  return 2;
8178  }
8179  return 0;
8180 }
8181 
8187 static inline size_t
8188 match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
8189  return match_eol_at(parser, parser->current.end + offset);
8190 }
8191 
8197 static inline size_t
8198 match_eol(pm_parser_t *parser) {
8199  return match_eol_at(parser, parser->current.end);
8200 }
8201 
8205 static inline const uint8_t *
8206 next_newline(const uint8_t *cursor, ptrdiff_t length) {
8207  assert(length >= 0);
8208 
8209  // Note that it's okay for us to use memchr here to look for \n because none
8210  // of the encodings that we support have \n as a component of a multi-byte
8211  // character.
8212  return memchr(cursor, '\n', (size_t) length);
8213 }
8214 
8218 static inline bool
8219 ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
8220  return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
8221 }
8222 
8227 static bool
8228 parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
8229  const pm_encoding_t *encoding = pm_encoding_find(start, end);
8230 
8231  if (encoding != NULL) {
8232  if (parser->encoding != encoding) {
8233  parser->encoding = encoding;
8234  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
8235  }
8236 
8237  parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
8238  return true;
8239  }
8240 
8241  return false;
8242 }
8243 
8248 static void
8249 parser_lex_magic_comment_encoding(pm_parser_t *parser) {
8250  const uint8_t *cursor = parser->current.start + 1;
8251  const uint8_t *end = parser->current.end;
8252 
8253  bool separator = false;
8254  while (true) {
8255  if (end - cursor <= 6) return;
8256  switch (cursor[6]) {
8257  case 'C': case 'c': cursor += 6; continue;
8258  case 'O': case 'o': cursor += 5; continue;
8259  case 'D': case 'd': cursor += 4; continue;
8260  case 'I': case 'i': cursor += 3; continue;
8261  case 'N': case 'n': cursor += 2; continue;
8262  case 'G': case 'g': cursor += 1; continue;
8263  case '=': case ':':
8264  separator = true;
8265  cursor += 6;
8266  break;
8267  default:
8268  cursor += 6;
8269  if (pm_char_is_whitespace(*cursor)) break;
8270  continue;
8271  }
8272  if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
8273  separator = false;
8274  }
8275 
8276  while (true) {
8277  do {
8278  if (++cursor >= end) return;
8279  } while (pm_char_is_whitespace(*cursor));
8280 
8281  if (separator) break;
8282  if (*cursor != '=' && *cursor != ':') return;
8283 
8284  separator = true;
8285  cursor++;
8286  }
8287 
8288  const uint8_t *value_start = cursor;
8289  while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
8290 
8291  if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
8292  // If we were unable to parse the encoding value, then we've got an
8293  // issue because we didn't understand the encoding that the user was
8294  // trying to use. In this case we'll keep using the default encoding but
8295  // add an error to the parser to indicate an unsuccessful parse.
8296  pm_parser_err(parser, value_start, cursor, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
8297  }
8298 }
8299 
8300 typedef enum {
8301  PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
8302  PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
8303  PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
8304 } pm_magic_comment_boolean_value_t;
8305 
8310 static pm_magic_comment_boolean_value_t
8311 parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
8312  if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
8313  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
8314  } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
8315  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
8316  } else {
8317  return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
8318  }
8319 }
8320 
8321 static inline bool
8322 pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
8323  return b == '\'' || b == '"' || b == ':' || b == ';';
8324 }
8325 
8331 static inline const uint8_t *
8332 parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
8333  while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, parser->encoding)) != NULL) {
8334  if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
8335  return cursor;
8336  }
8337  cursor++;
8338  }
8339  return NULL;
8340 }
8341 
8352 static inline bool
8353 parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
8354  bool result = true;
8355 
8356  const uint8_t *start = parser->current.start + 1;
8357  const uint8_t *end = parser->current.end;
8358  if (end - start <= 7) return false;
8359 
8360  const uint8_t *cursor;
8361  bool indicator = false;
8362 
8363  if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8364  start = cursor + 3;
8365 
8366  if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
8367  end = cursor;
8368  indicator = true;
8369  } else {
8370  // If we have a start marker but not an end marker, then we cannot
8371  // have a magic comment.
8372  return false;
8373  }
8374  }
8375 
8376  cursor = start;
8377  while (cursor < end) {
8378  while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
8379 
8380  const uint8_t *key_start = cursor;
8381  while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
8382 
8383  const uint8_t *key_end = cursor;
8384  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8385  if (cursor == end) break;
8386 
8387  if (*cursor == ':') {
8388  cursor++;
8389  } else {
8390  if (!indicator) return false;
8391  continue;
8392  }
8393 
8394  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8395  if (cursor == end) break;
8396 
8397  const uint8_t *value_start;
8398  const uint8_t *value_end;
8399 
8400  if (*cursor == '"') {
8401  value_start = ++cursor;
8402  for (; cursor < end && *cursor != '"'; cursor++) {
8403  if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
8404  }
8405  value_end = cursor;
8406  if (*cursor == '"') cursor++;
8407  } else {
8408  value_start = cursor;
8409  while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
8410  value_end = cursor;
8411  }
8412 
8413  if (indicator) {
8414  while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
8415  } else {
8416  while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
8417  if (cursor != end) return false;
8418  }
8419 
8420  // Here, we need to do some processing on the key to swap out dashes for
8421  // underscores. We only need to do this if there _is_ a dash in the key.
8422  pm_string_t key;
8423  const size_t key_length = (size_t) (key_end - key_start);
8424  const uint8_t *dash = pm_memchr(key_start, '-', key_length, parser->encoding_changed, parser->encoding);
8425 
8426  if (dash == NULL) {
8427  pm_string_shared_init(&key, key_start, key_end);
8428  } else {
8429  uint8_t *buffer = xmalloc(key_length);
8430  if (buffer == NULL) break;
8431 
8432  memcpy(buffer, key_start, key_length);
8433  buffer[dash - key_start] = '_';
8434 
8435  while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, parser->encoding)) != NULL) {
8436  buffer[dash - key_start] = '_';
8437  }
8438 
8439  pm_string_owned_init(&key, buffer, key_length);
8440  }
8441 
8442  // Finally, we can start checking the key against the list of known
8443  // magic comment keys, and potentially change state based on that.
8444  const uint8_t *key_source = pm_string_source(&key);
8445  uint32_t value_length = (uint32_t) (value_end - value_start);
8446 
8447  // We only want to attempt to compare against encoding comments if it's
8448  // the first line in the file (or the second in the case of a shebang).
8449  if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
8450  if (
8451  (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
8452  (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
8453  ) {
8454  result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
8455  }
8456  }
8457 
8458  if (key_length == 11) {
8459  if (pm_strncasecmp(key_source, (const uint8_t *) "warn_indent", 11) == 0) {
8460  switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8461  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8462  PM_PARSER_WARN_TOKEN_FORMAT(
8463  parser,
8464  parser->current,
8465  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8466  (int) key_length,
8467  (const char *) key_source,
8468  (int) value_length,
8469  (const char *) value_start
8470  );
8471  break;
8472  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8473  parser->warn_mismatched_indentation = false;
8474  break;
8475  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8476  parser->warn_mismatched_indentation = true;
8477  break;
8478  }
8479  }
8480  } else if (key_length == 21) {
8481  if (pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
8482  // We only want to handle frozen string literal comments if it's
8483  // before any semantic tokens have been seen.
8484  if (semantic_token_seen) {
8485  pm_parser_warn_token(parser, &parser->current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
8486  } else {
8487  switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
8488  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
8489  PM_PARSER_WARN_TOKEN_FORMAT(
8490  parser,
8491  parser->current,
8492  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8493  (int) key_length,
8494  (const char *) key_source,
8495  (int) value_length,
8496  (const char *) value_start
8497  );
8498  break;
8499  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
8501  break;
8502  case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
8504  break;
8505  }
8506  }
8507  }
8508  } else if (key_length == 24) {
8509  if (pm_strncasecmp(key_source, (const uint8_t *) "shareable_constant_value", 24) == 0) {
8510  const uint8_t *cursor = parser->current.start;
8511  while ((cursor > parser->start) && ((cursor[-1] == ' ') || (cursor[-1] == '\t'))) cursor--;
8512 
8513  if (!((cursor == parser->start) || (cursor[-1] == '\n'))) {
8514  pm_parser_warn_token(parser, &parser->current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
8515  } else if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "none", 4) == 0) {
8516  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
8517  } else if (value_length == 7 && pm_strncasecmp(value_start, (const uint8_t *) "literal", 7) == 0) {
8518  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
8519  } else if (value_length == 23 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_everything", 23) == 0) {
8520  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
8521  } else if (value_length == 17 && pm_strncasecmp(value_start, (const uint8_t *) "experimental_copy", 17) == 0) {
8522  pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
8523  } else {
8524  PM_PARSER_WARN_TOKEN_FORMAT(
8525  parser,
8526  parser->current,
8527  PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
8528  (int) key_length,
8529  (const char *) key_source,
8530  (int) value_length,
8531  (const char *) value_start
8532  );
8533  }
8534  }
8535  }
8536 
8537  // When we're done, we want to free the string in case we had to
8538  // allocate memory for it.
8539  pm_string_free(&key);
8540 
8541  // Allocate a new magic comment node to append to the parser's list.
8543  if ((magic_comment = (pm_magic_comment_t *) xcalloc(1, sizeof(pm_magic_comment_t))) != NULL) {
8544  magic_comment->key_start = key_start;
8545  magic_comment->value_start = value_start;
8546  magic_comment->key_length = (uint32_t) key_length;
8547  magic_comment->value_length = value_length;
8549  }
8550  }
8551 
8552  return result;
8553 }
8554 
8555 /******************************************************************************/
8556 /* Context manipulations */
8557 /******************************************************************************/
8558 
8559 static bool
8560 context_terminator(pm_context_t context, pm_token_t *token) {
8561  switch (context) {
8562  case PM_CONTEXT_MAIN:
8563  case PM_CONTEXT_DEF_PARAMS:
8564  case PM_CONTEXT_DEFINED:
8566  case PM_CONTEXT_TERNARY:
8568  return token->type == PM_TOKEN_EOF;
8570  return token->type == PM_TOKEN_COMMA || token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8571  case PM_CONTEXT_PREEXE:
8572  case PM_CONTEXT_POSTEXE:
8573  return token->type == PM_TOKEN_BRACE_RIGHT;
8574  case PM_CONTEXT_MODULE:
8575  case PM_CONTEXT_CLASS:
8576  case PM_CONTEXT_SCLASS:
8578  case PM_CONTEXT_DEF:
8580  return token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ENSURE;
8581  case PM_CONTEXT_WHILE:
8582  case PM_CONTEXT_UNTIL:
8583  case PM_CONTEXT_ELSE:
8584  case PM_CONTEXT_FOR:
8588  case PM_CONTEXT_DEF_ENSURE:
8592  return token->type == PM_TOKEN_KEYWORD_END;
8594  return token->type == PM_TOKEN_KEYWORD_DO || token->type == PM_TOKEN_KEYWORD_THEN;
8595  case PM_CONTEXT_FOR_INDEX:
8596  return token->type == PM_TOKEN_KEYWORD_IN;
8597  case PM_CONTEXT_CASE_WHEN:
8598  return token->type == PM_TOKEN_KEYWORD_WHEN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8599  case PM_CONTEXT_CASE_IN:
8600  return token->type == PM_TOKEN_KEYWORD_IN || token->type == PM_TOKEN_KEYWORD_END || token->type == PM_TOKEN_KEYWORD_ELSE;
8601  case PM_CONTEXT_IF:
8602  case PM_CONTEXT_ELSIF:
8603  return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_ELSIF || token->type == PM_TOKEN_KEYWORD_END;
8604  case PM_CONTEXT_UNLESS:
8605  return token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8606  case PM_CONTEXT_EMBEXPR:
8607  return token->type == PM_TOKEN_EMBEXPR_END;
8609  return token->type == PM_TOKEN_BRACE_RIGHT;
8610  case PM_CONTEXT_PARENS:
8611  return token->type == PM_TOKEN_PARENTHESIS_RIGHT;
8612  case PM_CONTEXT_BEGIN:
8616  case PM_CONTEXT_DEF_RESCUE:
8620  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_RESCUE || token->type == PM_TOKEN_KEYWORD_ELSE || token->type == PM_TOKEN_KEYWORD_END;
8621  case PM_CONTEXT_BEGIN_ELSE:
8622  case PM_CONTEXT_BLOCK_ELSE:
8623  case PM_CONTEXT_CLASS_ELSE:
8624  case PM_CONTEXT_DEF_ELSE:
8628  return token->type == PM_TOKEN_KEYWORD_ENSURE || token->type == PM_TOKEN_KEYWORD_END;
8630  return token->type == PM_TOKEN_BRACE_RIGHT;
8631  case PM_CONTEXT_PREDICATE:
8632  return token->type == PM_TOKEN_KEYWORD_THEN || token->type == PM_TOKEN_NEWLINE || token->type == PM_TOKEN_SEMICOLON;
8633  case PM_CONTEXT_NONE:
8634  return false;
8635  }
8636 
8637  return false;
8638 }
8639 
8644 static pm_context_t
8645 context_recoverable(const pm_parser_t *parser, pm_token_t *token) {
8646  pm_context_node_t *context_node = parser->current_context;
8647 
8648  while (context_node != NULL) {
8649  if (context_terminator(context_node->context, token)) return context_node->context;
8650  context_node = context_node->prev;
8651  }
8652 
8653  return PM_CONTEXT_NONE;
8654 }
8655 
8656 static bool
8657 context_push(pm_parser_t *parser, pm_context_t context) {
8658  pm_context_node_t *context_node = (pm_context_node_t *) xmalloc(sizeof(pm_context_node_t));
8659  if (context_node == NULL) return false;
8660 
8661  *context_node = (pm_context_node_t) { .context = context, .prev = NULL };
8662 
8663  if (parser->current_context == NULL) {
8664  parser->current_context = context_node;
8665  } else {
8666  context_node->prev = parser->current_context;
8667  parser->current_context = context_node;
8668  }
8669 
8670  return true;
8671 }
8672 
8673 static void
8674 context_pop(pm_parser_t *parser) {
8675  pm_context_node_t *prev = parser->current_context->prev;
8676  xfree(parser->current_context);
8677  parser->current_context = prev;
8678 }
8679 
8680 static bool
8681 context_p(const pm_parser_t *parser, pm_context_t context) {
8682  pm_context_node_t *context_node = parser->current_context;
8683 
8684  while (context_node != NULL) {
8685  if (context_node->context == context) return true;
8686  context_node = context_node->prev;
8687  }
8688 
8689  return false;
8690 }
8691 
8692 static bool
8693 context_def_p(const pm_parser_t *parser) {
8694  pm_context_node_t *context_node = parser->current_context;
8695 
8696  while (context_node != NULL) {
8697  switch (context_node->context) {
8698  case PM_CONTEXT_DEF:
8699  case PM_CONTEXT_DEF_PARAMS:
8700  case PM_CONTEXT_DEF_ENSURE:
8701  case PM_CONTEXT_DEF_RESCUE:
8702  case PM_CONTEXT_DEF_ELSE:
8703  return true;
8704  case PM_CONTEXT_CLASS:
8707  case PM_CONTEXT_CLASS_ELSE:
8708  case PM_CONTEXT_MODULE:
8712  case PM_CONTEXT_SCLASS:
8716  return false;
8717  default:
8718  context_node = context_node->prev;
8719  }
8720  }
8721 
8722  return false;
8723 }
8724 
8729 static const char *
8730 context_human(pm_context_t context) {
8731  switch (context) {
8732  case PM_CONTEXT_NONE:
8733  assert(false && "unreachable");
8734  return "";
8735  case PM_CONTEXT_BEGIN: return "begin statement";
8736  case PM_CONTEXT_BLOCK_BRACES: return "'{'..'}' block";
8737  case PM_CONTEXT_BLOCK_KEYWORDS: return "'do'..'end' block";
8738  case PM_CONTEXT_CASE_WHEN: return "'when' clause";
8739  case PM_CONTEXT_CASE_IN: return "'in' clause";
8740  case PM_CONTEXT_CLASS: return "class definition";
8741  case PM_CONTEXT_DEF: return "method definition";
8742  case PM_CONTEXT_DEF_PARAMS: return "method parameters";
8743  case PM_CONTEXT_DEFAULT_PARAMS: return "parameter default value";
8744  case PM_CONTEXT_DEFINED: return "'defined?' expression";
8745  case PM_CONTEXT_ELSE:
8746  case PM_CONTEXT_BEGIN_ELSE:
8747  case PM_CONTEXT_BLOCK_ELSE:
8748  case PM_CONTEXT_CLASS_ELSE:
8749  case PM_CONTEXT_DEF_ELSE:
8752  case PM_CONTEXT_SCLASS_ELSE: return "'else' clause";
8753  case PM_CONTEXT_ELSIF: return "'elsif' clause";
8754  case PM_CONTEXT_EMBEXPR: return "embedded expression";
8758  case PM_CONTEXT_DEF_ENSURE:
8761  case PM_CONTEXT_SCLASS_ENSURE: return "'ensure' clause";
8762  case PM_CONTEXT_FOR: return "for loop";
8763  case PM_CONTEXT_FOR_INDEX: return "for loop index";
8764  case PM_CONTEXT_IF: return "if statement";
8765  case PM_CONTEXT_LAMBDA_BRACES: return "'{'..'}' lambda block";
8766  case PM_CONTEXT_LAMBDA_DO_END: return "'do'..'end' lambda block";
8767  case PM_CONTEXT_LOOP_PREDICATE: return "loop predicate";
8768  case PM_CONTEXT_MAIN: return "top level context";
8769  case PM_CONTEXT_MODULE: return "module definition";
8770  case PM_CONTEXT_MULTI_TARGET: return "multiple targets";
8771  case PM_CONTEXT_PARENS: return "parentheses";
8772  case PM_CONTEXT_POSTEXE: return "'END' block";
8773  case PM_CONTEXT_PREDICATE: return "predicate";
8774  case PM_CONTEXT_PREEXE: return "'BEGIN' block";
8778  case PM_CONTEXT_DEF_RESCUE:
8782  case PM_CONTEXT_SCLASS_RESCUE: return "'rescue' clause";
8783  case PM_CONTEXT_SCLASS: return "singleton class definition";
8784  case PM_CONTEXT_TERNARY: return "ternary expression";
8785  case PM_CONTEXT_UNLESS: return "unless statement";
8786  case PM_CONTEXT_UNTIL: return "until statement";
8787  case PM_CONTEXT_WHILE: return "while statement";
8788  }
8789 
8790  assert(false && "unreachable");
8791  return "";
8792 }
8793 
8794 /******************************************************************************/
8795 /* Specific token lexers */
8796 /******************************************************************************/
8797 
8798 static inline void
8799 pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *string, size_t length, const uint8_t *invalid) {
8800  if (invalid != NULL) {
8801  pm_diagnostic_id_t diag_id = (invalid == (string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
8802  pm_parser_err(parser, invalid, invalid + 1, diag_id);
8803  }
8804 }
8805 
8806 static size_t
8807 pm_strspn_binary_number_validate(pm_parser_t *parser, const uint8_t *string) {
8808  const uint8_t *invalid = NULL;
8809  size_t length = pm_strspn_binary_number(string, parser->end - string, &invalid);
8810  pm_strspn_number_validate(parser, string, length, invalid);
8811  return length;
8812 }
8813 
8814 static size_t
8815 pm_strspn_octal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8816  const uint8_t *invalid = NULL;
8817  size_t length = pm_strspn_octal_number(string, parser->end - string, &invalid);
8818  pm_strspn_number_validate(parser, string, length, invalid);
8819  return length;
8820 }
8821 
8822 static size_t
8823 pm_strspn_decimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8824  const uint8_t *invalid = NULL;
8825  size_t length = pm_strspn_decimal_number(string, parser->end - string, &invalid);
8826  pm_strspn_number_validate(parser, string, length, invalid);
8827  return length;
8828 }
8829 
8830 static size_t
8831 pm_strspn_hexadecimal_number_validate(pm_parser_t *parser, const uint8_t *string) {
8832  const uint8_t *invalid = NULL;
8833  size_t length = pm_strspn_hexadecimal_number(string, parser->end - string, &invalid);
8834  pm_strspn_number_validate(parser, string, length, invalid);
8835  return length;
8836 }
8837 
8838 static pm_token_type_t
8839 lex_optional_float_suffix(pm_parser_t *parser, bool* seen_e) {
8841 
8842  // Here we're going to attempt to parse the optional decimal portion of a
8843  // float. If it's not there, then it's okay and we'll just continue on.
8844  if (peek(parser) == '.') {
8845  if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8846  parser->current.end += 2;
8847  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8848  type = PM_TOKEN_FLOAT;
8849  } else {
8850  // If we had a . and then something else, then it's not a float
8851  // suffix on a number it's a method call or something else.
8852  return type;
8853  }
8854  }
8855 
8856  // Here we're going to attempt to parse the optional exponent portion of a
8857  // float. If it's not there, it's okay and we'll just continue on.
8858  if ((peek(parser) == 'e') || (peek(parser) == 'E')) {
8859  if ((peek_offset(parser, 1) == '+') || (peek_offset(parser, 1) == '-')) {
8860  parser->current.end += 2;
8861 
8862  if (pm_char_is_decimal_digit(peek(parser))) {
8863  parser->current.end++;
8864  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8865  } else {
8866  pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
8867  }
8868  } else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8869  parser->current.end++;
8870  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8871  } else {
8872  return type;
8873  }
8874 
8875  *seen_e = true;
8876  type = PM_TOKEN_FLOAT;
8877  }
8878 
8879  return type;
8880 }
8881 
8882 static pm_token_type_t
8883 lex_numeric_prefix(pm_parser_t *parser, bool* seen_e) {
8885  *seen_e = false;
8886 
8887  if (peek_offset(parser, -1) == '0') {
8888  switch (*parser->current.end) {
8889  // 0d1111 is a decimal number
8890  case 'd':
8891  case 'D':
8892  parser->current.end++;
8893  if (pm_char_is_decimal_digit(peek(parser))) {
8894  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8895  } else {
8896  match(parser, '_');
8897  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
8898  }
8899 
8900  break;
8901 
8902  // 0b1111 is a binary number
8903  case 'b':
8904  case 'B':
8905  parser->current.end++;
8906  if (pm_char_is_binary_digit(peek(parser))) {
8907  parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
8908  } else {
8909  match(parser, '_');
8910  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8911  }
8912 
8914  break;
8915 
8916  // 0o1111 is an octal number
8917  case 'o':
8918  case 'O':
8919  parser->current.end++;
8920  if (pm_char_is_octal_digit(peek(parser))) {
8921  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8922  } else {
8923  match(parser, '_');
8924  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8925  }
8926 
8928  break;
8929 
8930  // 01111 is an octal number
8931  case '_':
8932  case '0':
8933  case '1':
8934  case '2':
8935  case '3':
8936  case '4':
8937  case '5':
8938  case '6':
8939  case '7':
8940  parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
8942  break;
8943 
8944  // 0x1111 is a hexadecimal number
8945  case 'x':
8946  case 'X':
8947  parser->current.end++;
8948  if (pm_char_is_hexadecimal_digit(peek(parser))) {
8949  parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
8950  } else {
8951  match(parser, '_');
8952  pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8953  }
8954 
8956  break;
8957 
8958  // 0.xxx is a float
8959  case '.': {
8960  type = lex_optional_float_suffix(parser, seen_e);
8961  break;
8962  }
8963 
8964  // 0exxx is a float
8965  case 'e':
8966  case 'E': {
8967  type = lex_optional_float_suffix(parser, seen_e);
8968  break;
8969  }
8970  }
8971  } else {
8972  // If it didn't start with a 0, then we'll lex as far as we can into a
8973  // decimal number.
8974  parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
8975 
8976  // Afterward, we'll lex as far as we can into an optional float suffix.
8977  type = lex_optional_float_suffix(parser, seen_e);
8978  }
8979 
8980  // At this point we have a completed number, but we want to provide the user
8981  // with a good experience if they put an additional .xxx fractional
8982  // component on the end, so we'll check for that here.
8983  if (peek_offset(parser, 0) == '.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8984  const uint8_t *fraction_start = parser->current.end;
8985  const uint8_t *fraction_end = parser->current.end + 2;
8986  fraction_end += pm_strspn_decimal_digit(fraction_end, parser->end - fraction_end);
8987  pm_parser_err(parser, fraction_start, fraction_end, PM_ERR_INVALID_NUMBER_FRACTION);
8988  }
8989 
8990  return type;
8991 }
8992 
8993 static pm_token_type_t
8994 lex_numeric(pm_parser_t *parser) {
8997 
8998  if (parser->current.end < parser->end) {
8999  bool seen_e = false;
9000  type = lex_numeric_prefix(parser, &seen_e);
9001 
9002  const uint8_t *end = parser->current.end;
9003  pm_token_type_t suffix_type = type;
9004 
9005  if (type == PM_TOKEN_INTEGER) {
9006  if (match(parser, 'r')) {
9007  suffix_type = PM_TOKEN_INTEGER_RATIONAL;
9008 
9009  if (match(parser, 'i')) {
9011  }
9012  } else if (match(parser, 'i')) {
9013  suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
9014  }
9015  } else {
9016  if (!seen_e && match(parser, 'r')) {
9017  suffix_type = PM_TOKEN_FLOAT_RATIONAL;
9018 
9019  if (match(parser, 'i')) {
9020  suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
9021  }
9022  } else if (match(parser, 'i')) {
9023  suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
9024  }
9025  }
9026 
9027  const uint8_t b = peek(parser);
9028  if (b != '\0' && (b >= 0x80 || ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')) || b == '_')) {
9029  parser->current.end = end;
9030  } else {
9031  type = suffix_type;
9032  }
9033  }
9034 
9035  return type;
9036 }
9037 
9038 static pm_token_type_t
9039 lex_global_variable(pm_parser_t *parser) {
9040  if (parser->current.end >= parser->end) {
9041  pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9042  return PM_TOKEN_GLOBAL_VARIABLE;
9043  }
9044 
9045  // True if multiple characters are allowed after the declaration of the
9046  // global variable. Not true when it starts with "$-".
9047  bool allow_multiple = true;
9048 
9049  switch (*parser->current.end) {
9050  case '~': // $~: match-data
9051  case '*': // $*: argv
9052  case '$': // $$: pid
9053  case '?': // $?: last status
9054  case '!': // $!: error string
9055  case '@': // $@: error position
9056  case '/': // $/: input record separator
9057  case '\\': // $\: output record separator
9058  case ';': // $;: field separator
9059  case ',': // $,: output field separator
9060  case '.': // $.: last read line number
9061  case '=': // $=: ignorecase
9062  case ':': // $:: load path
9063  case '<': // $<: reading filename
9064  case '>': // $>: default output handle
9065  case '\"': // $": already loaded files
9066  parser->current.end++;
9067  return PM_TOKEN_GLOBAL_VARIABLE;
9068 
9069  case '&': // $&: last match
9070  case '`': // $`: string before last match
9071  case '\'': // $': string after last match
9072  case '+': // $+: string matches last paren.
9073  parser->current.end++;
9074  return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
9075 
9076  case '0': {
9077  parser->current.end++;
9078  size_t width;
9079 
9080  if (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
9081  do {
9082  parser->current.end += width;
9083  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9084 
9085  // $0 isn't allowed to be followed by anything.
9086  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9087  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, diag_id);
9088  }
9089 
9090  return PM_TOKEN_GLOBAL_VARIABLE;
9091  }
9092 
9093  case '1':
9094  case '2':
9095  case '3':
9096  case '4':
9097  case '5':
9098  case '6':
9099  case '7':
9100  case '8':
9101  case '9':
9102  parser->current.end += pm_strspn_decimal_digit(parser->current.end, parser->end - parser->current.end);
9103  return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
9104 
9105  case '-':
9106  parser->current.end++;
9107  allow_multiple = false;
9108  /* fallthrough */
9109  default: {
9110  size_t width;
9111 
9112  if ((width = char_is_identifier(parser, parser->current.end)) > 0) {
9113  do {
9114  parser->current.end += width;
9115  } while (allow_multiple && parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
9116  } else if (pm_char_is_whitespace(peek(parser))) {
9117  // If we get here, then we have a $ followed by whitespace,
9118  // which is not allowed.
9119  pm_parser_err_token(parser, &parser->current, PM_ERR_GLOBAL_VARIABLE_BARE);
9120  } else {
9121  // If we get here, then we have a $ followed by something that
9122  // isn't recognized as a global variable.
9123  pm_diagnostic_id_t diag_id = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? PM_ERR_INVALID_VARIABLE_GLOBAL_3_3 : PM_ERR_INVALID_VARIABLE_GLOBAL;
9124  const uint8_t *end = parser->current.end + parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9125  PM_PARSER_ERR_FORMAT(parser, parser->current.start, end, diag_id, (int) (end - parser->current.start), (const char *) parser->current.start);
9126  }
9127 
9128  return PM_TOKEN_GLOBAL_VARIABLE;
9129  }
9130  }
9131 }
9132 
9145 static inline pm_token_type_t
9146 lex_keyword(pm_parser_t *parser, const uint8_t *current_start, const char *value, size_t vlen, pm_lex_state_t state, pm_token_type_t type, pm_token_type_t modifier_type) {
9147  if (memcmp(current_start, value, vlen) == 0) {
9148  pm_lex_state_t last_state = parser->lex_state;
9149 
9150  if (parser->lex_state & PM_LEX_STATE_FNAME) {
9151  lex_state_set(parser, PM_LEX_STATE_ENDFN);
9152  } else {
9153  lex_state_set(parser, state);
9154  if (state == PM_LEX_STATE_BEG) {
9155  parser->command_start = true;
9156  }
9157 
9158  if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
9159  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
9160  return modifier_type;
9161  }
9162  }
9163 
9164  return type;
9165  }
9166 
9167  return PM_TOKEN_EOF;
9168 }
9169 
9170 static pm_token_type_t
9171 lex_identifier(pm_parser_t *parser, bool previous_command_start) {
9172  // Lex as far as we can into the current identifier.
9173  size_t width;
9174  const uint8_t *end = parser->end;
9175  const uint8_t *current_start = parser->current.start;
9176  const uint8_t *current_end = parser->current.end;
9177  bool encoding_changed = parser->encoding_changed;
9178 
9179  if (encoding_changed) {
9180  while (current_end < end && (width = char_is_identifier(parser, current_end)) > 0) {
9181  current_end += width;
9182  }
9183  } else {
9184  while (current_end < end && (width = char_is_identifier_utf8(current_end, end)) > 0) {
9185  current_end += width;
9186  }
9187  }
9188  parser->current.end = current_end;
9189 
9190  // Now cache the length of the identifier so that we can quickly compare it
9191  // against known keywords.
9192  width = (size_t) (current_end - current_start);
9193 
9194  if (current_end < end) {
9195  if (((current_end + 1 >= end) || (current_end[1] != '=')) && (match(parser, '!') || match(parser, '?'))) {
9196  // First we'll attempt to extend the identifier by a ! or ?. Then we'll
9197  // check if we're returning the defined? keyword or just an identifier.
9198  width++;
9199 
9200  if (
9201  ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9202  (peek(parser) == ':') && (peek_offset(parser, 1) != ':')
9203  ) {
9204  // If we're in a position where we can accept a : at the end of an
9205  // identifier, then we'll optionally accept it.
9206  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9207  (void) match(parser, ':');
9208  return PM_TOKEN_LABEL;
9209  }
9210 
9211  if (parser->lex_state != PM_LEX_STATE_DOT) {
9212  if (width == 8 && (lex_keyword(parser, current_start, "defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
9213  return PM_TOKEN_KEYWORD_DEFINED;
9214  }
9215  }
9216 
9217  return PM_TOKEN_METHOD_NAME;
9218  }
9219 
9220  if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) != '~' && peek_offset(parser, 1) != '>' && (peek_offset(parser, 1) != '=' || peek_offset(parser, 2) == '>') && match(parser, '=')) {
9221  // If we're in a position where we can accept a = at the end of an
9222  // identifier, then we'll optionally accept it.
9223  return PM_TOKEN_IDENTIFIER;
9224  }
9225 
9226  if (
9227  ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
9228  peek(parser) == ':' && peek_offset(parser, 1) != ':'
9229  ) {
9230  // If we're in a position where we can accept a : at the end of an
9231  // identifier, then we'll optionally accept it.
9232  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
9233  (void) match(parser, ':');
9234  return PM_TOKEN_LABEL;
9235  }
9236  }
9237 
9238  if (parser->lex_state != PM_LEX_STATE_DOT) {
9240  switch (width) {
9241  case 2:
9242  if (lex_keyword(parser, current_start, "do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
9243  if (pm_do_loop_stack_p(parser)) {
9244  return PM_TOKEN_KEYWORD_DO_LOOP;
9245  }
9246  return PM_TOKEN_KEYWORD_DO;
9247  }
9248 
9249  if ((type = lex_keyword(parser, current_start, "if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF) return type;
9250  if ((type = lex_keyword(parser, current_start, "in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9251  if ((type = lex_keyword(parser, current_start, "or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9252  break;
9253  case 3:
9254  if ((type = lex_keyword(parser, current_start, "and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9255  if ((type = lex_keyword(parser, current_start, "def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9256  if ((type = lex_keyword(parser, current_start, "end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9257  if ((type = lex_keyword(parser, current_start, "END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9258  if ((type = lex_keyword(parser, current_start, "for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9259  if ((type = lex_keyword(parser, current_start, "nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9260  if ((type = lex_keyword(parser, current_start, "not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9261  break;
9262  case 4:
9263  if ((type = lex_keyword(parser, current_start, "case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9264  if ((type = lex_keyword(parser, current_start, "else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9265  if ((type = lex_keyword(parser, current_start, "next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9266  if ((type = lex_keyword(parser, current_start, "redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9267  if ((type = lex_keyword(parser, current_start, "self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9268  if ((type = lex_keyword(parser, current_start, "then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9269  if ((type = lex_keyword(parser, current_start, "true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9270  if ((type = lex_keyword(parser, current_start, "when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9271  break;
9272  case 5:
9273  if ((type = lex_keyword(parser, current_start, "alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9274  if ((type = lex_keyword(parser, current_start, "begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9275  if ((type = lex_keyword(parser, current_start, "BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9276  if ((type = lex_keyword(parser, current_start, "break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9277  if ((type = lex_keyword(parser, current_start, "class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9278  if ((type = lex_keyword(parser, current_start, "elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9279  if ((type = lex_keyword(parser, current_start, "false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9280  if ((type = lex_keyword(parser, current_start, "retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9281  if ((type = lex_keyword(parser, current_start, "super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9282  if ((type = lex_keyword(parser, current_start, "undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9283  if ((type = lex_keyword(parser, current_start, "until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF) return type;
9284  if ((type = lex_keyword(parser, current_start, "while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF) return type;
9285  if ((type = lex_keyword(parser, current_start, "yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9286  break;
9287  case 6:
9288  if ((type = lex_keyword(parser, current_start, "ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9289  if ((type = lex_keyword(parser, current_start, "module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9290  if ((type = lex_keyword(parser, current_start, "rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF) return type;
9291  if ((type = lex_keyword(parser, current_start, "return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9292  if ((type = lex_keyword(parser, current_start, "unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF) return type;
9293  break;
9294  case 8:
9295  if ((type = lex_keyword(parser, current_start, "__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9296  if ((type = lex_keyword(parser, current_start, "__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9297  break;
9298  case 12:
9299  if ((type = lex_keyword(parser, current_start, "__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF) return type;
9300  break;
9301  }
9302  }
9303 
9304  if (encoding_changed) {
9305  return parser->encoding->isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9306  }
9307  return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
9308 }
9309 
9314 static bool
9315 current_token_starts_line(pm_parser_t *parser) {
9316  return (parser->current.start == parser->start) || (parser->current.start[-1] == '\n');
9317 }
9318 
9333 static pm_token_type_t
9334 lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
9335  // If there is no content following this #, then we're at the end of
9336  // the string and we can safely return string content.
9337  if (pound + 1 >= parser->end) {
9338  parser->current.end = pound + 1;
9339  return PM_TOKEN_STRING_CONTENT;
9340  }
9341 
9342  // Now we'll check against the character that follows the #. If it constitutes
9343  // valid interplation, we'll handle that, otherwise we'll return
9344  // PM_TOKEN_NOT_PROVIDED.
9345  switch (pound[1]) {
9346  case '@': {
9347  // In this case we may have hit an embedded instance or class variable.
9348  if (pound + 2 >= parser->end) {
9349  parser->current.end = pound + 1;
9350  return PM_TOKEN_STRING_CONTENT;
9351  }
9352 
9353  // If we're looking at a @ and there's another @, then we'll skip past the
9354  // second @.
9355  const uint8_t *variable = pound + 2;
9356  if (*variable == '@' && pound + 3 < parser->end) variable++;
9357 
9358  if (char_is_identifier_start(parser, variable)) {
9359  // At this point we're sure that we've either hit an embedded instance
9360  // or class variable. In this case we'll first need to check if we've
9361  // already consumed content.
9362  if (pound > parser->current.start) {
9363  parser->current.end = pound;
9364  return PM_TOKEN_STRING_CONTENT;
9365  }
9366 
9367  // Otherwise we need to return the embedded variable token
9368  // and then switch to the embedded variable lex mode.
9369  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9370  parser->current.end = pound + 1;
9371  return PM_TOKEN_EMBVAR;
9372  }
9373 
9374  // If we didn't get a valid interpolation, then this is just regular
9375  // string content. This is like if we get "#@-". In this case the caller
9376  // should keep lexing.
9377  parser->current.end = pound + 1;
9378  return PM_TOKEN_NOT_PROVIDED;
9379  }
9380  case '$':
9381  // In this case we may have hit an embedded global variable. If there's
9382  // not enough room, then we'll just return string content.
9383  if (pound + 2 >= parser->end) {
9384  parser->current.end = pound + 1;
9385  return PM_TOKEN_STRING_CONTENT;
9386  }
9387 
9388  // This is the character that we're going to check to see if it is the
9389  // start of an identifier that would indicate that this is a global
9390  // variable.
9391  const uint8_t *check = pound + 2;
9392 
9393  if (pound[2] == '-') {
9394  if (pound + 3 >= parser->end) {
9395  parser->current.end = pound + 2;
9396  return PM_TOKEN_STRING_CONTENT;
9397  }
9398 
9399  check++;
9400  }
9401 
9402  // If the character that we're going to check is the start of an
9403  // identifier, or we don't have a - and the character is a decimal number
9404  // or a global name punctuation character, then we've hit an embedded
9405  // global variable.
9406  if (
9407  char_is_identifier_start(parser, check) ||
9408  (pound[2] != '-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
9409  ) {
9410  // In this case we've hit an embedded global variable. First check to
9411  // see if we've already consumed content. If we have, then we need to
9412  // return that content as string content first.
9413  if (pound > parser->current.start) {
9414  parser->current.end = pound;
9415  return PM_TOKEN_STRING_CONTENT;
9416  }
9417 
9418  // Otherwise, we need to return the embedded variable token and switch
9419  // to the embedded variable lex mode.
9420  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
9421  parser->current.end = pound + 1;
9422  return PM_TOKEN_EMBVAR;
9423  }
9424 
9425  // In this case we've hit a #$ that does not indicate a global variable.
9426  // In this case we'll continue lexing past it.
9427  parser->current.end = pound + 1;
9428  return PM_TOKEN_NOT_PROVIDED;
9429  case '{':
9430  // In this case it's the start of an embedded expression. If we have
9431  // already consumed content, then we need to return that content as string
9432  // content first.
9433  if (pound > parser->current.start) {
9434  parser->current.end = pound;
9435  return PM_TOKEN_STRING_CONTENT;
9436  }
9437 
9438  parser->enclosure_nesting++;
9439 
9440  // Otherwise we'll skip past the #{ and begin lexing the embedded
9441  // expression.
9442  lex_mode_push(parser, (pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
9443  parser->current.end = pound + 2;
9444  parser->command_start = true;
9445  pm_do_loop_stack_push(parser, false);
9446  return PM_TOKEN_EMBEXPR_BEGIN;
9447  default:
9448  // In this case we've hit a # that doesn't constitute interpolation. We'll
9449  // mark that by returning the not provided token type. This tells the
9450  // consumer to keep lexing forward.
9451  parser->current.end = pound + 1;
9452  return PM_TOKEN_NOT_PROVIDED;
9453  }
9454 }
9455 
9456 static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
9457 static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
9458 static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
9459 static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
9460 static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
9461 
9465 static const bool ascii_printable_chars[] = {
9466  0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9467  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9468  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9469  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9470  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9471  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
9472  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9473  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
9474 };
9475 
9476 static inline bool
9477 char_is_ascii_printable(const uint8_t b) {
9478  return (b < 0x80) && ascii_printable_chars[b];
9479 }
9480 
9485 static inline uint8_t
9486 escape_hexadecimal_digit(const uint8_t value) {
9487  return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
9488 }
9489 
9495 static inline uint32_t
9496 escape_unicode(pm_parser_t *parser, const uint8_t *string, size_t length) {
9497  uint32_t value = 0;
9498  for (size_t index = 0; index < length; index++) {
9499  if (index != 0) value <<= 4;
9500  value |= escape_hexadecimal_digit(string[index]);
9501  }
9502 
9503  // Here we're going to verify that the value is actually a valid Unicode
9504  // codepoint and not a surrogate pair.
9505  if (value >= 0xD800 && value <= 0xDFFF) {
9506  pm_parser_err(parser, string, string + length, PM_ERR_ESCAPE_INVALID_UNICODE);
9507  return 0xFFFD;
9508  }
9509 
9510  return value;
9511 }
9512 
9516 static inline uint8_t
9517 escape_byte(uint8_t value, const uint8_t flags) {
9518  if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
9519  if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
9520  return value;
9521 }
9522 
9526 static inline void
9527 escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t flags, const uint8_t *start, const uint8_t *end, uint32_t value) {
9528  // \u escape sequences in string-like structures implicitly change the
9529  // encoding to UTF-8 if they are >= 0x80 or if they are used in a character
9530  // literal.
9531  if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
9532  if (parser->explicit_encoding != NULL && parser->explicit_encoding != PM_ENCODING_UTF_8_ENTRY) {
9533  PM_PARSER_ERR_FORMAT(parser, start, end, PM_ERR_MIXED_ENCODING, parser->explicit_encoding->name);
9534  }
9535 
9537  }
9538 
9539  if (value <= 0x7F) { // 0xxxxxxx
9540  pm_buffer_append_byte(buffer, (uint8_t) value);
9541  } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
9542  pm_buffer_append_byte(buffer, (uint8_t) (0xC0 | (value >> 6)));
9543  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9544  } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
9545  pm_buffer_append_byte(buffer, (uint8_t) (0xE0 | (value >> 12)));
9546  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9547  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9548  } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
9549  pm_buffer_append_byte(buffer, (uint8_t) (0xF0 | (value >> 18)));
9550  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
9551  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
9552  pm_buffer_append_byte(buffer, (uint8_t) (0x80 | (value & 0x3F)));
9553  } else {
9554  pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
9555  pm_buffer_append_byte(buffer, 0xEF);
9556  pm_buffer_append_byte(buffer, 0xBF);
9557  pm_buffer_append_byte(buffer, 0xBD);
9558  }
9559 }
9560 
9565 static inline void
9566 escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte) {
9567  if (byte >= 0x80) {
9568  if (parser->explicit_encoding != NULL && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
9569  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_MIXED_ENCODING, parser->encoding->name);
9570  }
9571 
9572  parser->explicit_encoding = parser->encoding;
9573  }
9574 
9575  pm_buffer_append_byte(buffer, byte);
9576 }
9577 
9581 static inline void
9582 escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
9583  size_t width;
9584  if (parser->encoding_changed) {
9585  width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9586  } else {
9587  width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
9588  }
9589 
9590  // TODO: If the character is invalid in the given encoding, then we'll just
9591  // push one byte into the buffer. This should actually be an error.
9592  width = (width == 0) ? 1 : width;
9593 
9594  for (size_t index = 0; index < width; index++) {
9595  escape_write_byte_encoded(parser, buffer, *parser->current.end);
9596  parser->current.end++;
9597  }
9598 }
9599 
9615 static inline void
9616 escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
9617  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9618  pm_buffer_append_format(regular_expression_buffer, "\\x%02X", byte);
9619  }
9620 
9621  escape_write_byte_encoded(parser, buffer, byte);
9622 }
9623 
9629 static void
9630 escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *type) {
9631 #define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
9632 
9633  PM_PARSER_WARN_TOKEN_FORMAT(
9634  parser,
9635  parser->current,
9636  PM_WARN_INVALID_CHARACTER,
9637  FLAG(flags),
9638  FLAG(flag),
9639  type
9640  );
9641 
9642 #undef FLAG
9643 }
9644 
9648 static void
9649 escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
9650  switch (peek(parser)) {
9651  case '\\': {
9652  parser->current.end++;
9653  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
9654  return;
9655  }
9656  case '\'': {
9657  parser->current.end++;
9658  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\'', flags));
9659  return;
9660  }
9661  case 'a': {
9662  parser->current.end++;
9663  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\a', flags));
9664  return;
9665  }
9666  case 'b': {
9667  parser->current.end++;
9668  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\b', flags));
9669  return;
9670  }
9671  case 'e': {
9672  parser->current.end++;
9673  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\033', flags));
9674  return;
9675  }
9676  case 'f': {
9677  parser->current.end++;
9678  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\f', flags));
9679  return;
9680  }
9681  case 'n': {
9682  parser->current.end++;
9683  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\n', flags));
9684  return;
9685  }
9686  case 'r': {
9687  parser->current.end++;
9688  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\r', flags));
9689  return;
9690  }
9691  case 's': {
9692  parser->current.end++;
9693  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(' ', flags));
9694  return;
9695  }
9696  case 't': {
9697  parser->current.end++;
9698  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\t', flags));
9699  return;
9700  }
9701  case 'v': {
9702  parser->current.end++;
9703  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\v', flags));
9704  return;
9705  }
9706  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
9707  uint8_t value = (uint8_t) (*parser->current.end - '0');
9708  parser->current.end++;
9709 
9710  if (pm_char_is_octal_digit(peek(parser))) {
9711  value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9712  parser->current.end++;
9713 
9714  if (pm_char_is_octal_digit(peek(parser))) {
9715  value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
9716  parser->current.end++;
9717  }
9718  }
9719 
9720  escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
9721  return;
9722  }
9723  case 'x': {
9724  const uint8_t *start = parser->current.end - 1;
9725 
9726  parser->current.end++;
9727  uint8_t byte = peek(parser);
9728 
9729  if (pm_char_is_hexadecimal_digit(byte)) {
9730  uint8_t value = escape_hexadecimal_digit(byte);
9731  parser->current.end++;
9732 
9733  byte = peek(parser);
9734  if (pm_char_is_hexadecimal_digit(byte)) {
9735  value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
9736  parser->current.end++;
9737  }
9738 
9739  value = escape_byte(value, flags);
9740  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9741  if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
9742  pm_buffer_append_format(regular_expression_buffer, "\\x%02X", value);
9743  } else {
9744  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9745  }
9746  }
9747 
9748  escape_write_byte_encoded(parser, buffer, value);
9749  } else {
9750  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
9751  }
9752 
9753  return;
9754  }
9755  case 'u': {
9756  const uint8_t *start = parser->current.end - 1;
9757  parser->current.end++;
9758 
9759  if (parser->current.end == parser->end) {
9760  const uint8_t *start = parser->current.end - 2;
9761  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9762  } else if (peek(parser) == '{') {
9763  const uint8_t *unicode_codepoints_start = parser->current.end - 2;
9764  parser->current.end++;
9765 
9766  size_t whitespace;
9767  while (true) {
9768  if ((whitespace = pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end)) > 0) {
9769  parser->current.end += whitespace;
9770  } else if (peek(parser) == '\\' && peek_offset(parser, 1) == 'n') {
9771  // This is super hacky, but it gets us nicer error
9772  // messages because we can still pass it off to the
9773  // regular expression engine even if we hit an
9774  // unterminated regular expression.
9775  parser->current.end += 2;
9776  } else {
9777  break;
9778  }
9779  }
9780 
9781  const uint8_t *extra_codepoints_start = NULL;
9782  int codepoints_count = 0;
9783 
9784  while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
9785  const uint8_t *unicode_start = parser->current.end;
9786  size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
9787 
9788  if (hexadecimal_length > 6) {
9789  // \u{nnnn} character literal allows only 1-6 hexadecimal digits
9790  pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
9791  } else if (hexadecimal_length == 0) {
9792  // there are not hexadecimal characters
9793 
9794  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9795  // If this is a regular expression, we are going to
9796  // let the regular expression engine handle this
9797  // error instead of us.
9798  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9799  } else {
9800  pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE);
9801  pm_parser_err(parser, parser->current.end, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9802  }
9803 
9804  return;
9805  }
9806 
9807  parser->current.end += hexadecimal_length;
9808  codepoints_count++;
9809  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
9810  extra_codepoints_start = unicode_start;
9811  }
9812 
9813  uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length);
9814  escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
9815 
9816  parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
9817  }
9818 
9819  // ?\u{nnnn} character literal should contain only one codepoint
9820  // and cannot be like ?\u{nnnn mmmm}.
9821  if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
9822  pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
9823  }
9824 
9825  if (parser->current.end == parser->end) {
9826  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (int) (parser->current.end - start), start);
9827  } else if (peek(parser) == '}') {
9828  parser->current.end++;
9829  } else {
9830  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9831  // If this is a regular expression, we are going to let
9832  // the regular expression engine handle this error
9833  // instead of us.
9834  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9835  } else {
9836  pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
9837  }
9838  }
9839 
9840  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9841  pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
9842  }
9843  } else {
9844  size_t length = pm_strspn_hexadecimal_digit(parser->current.end, MIN(parser->end - parser->current.end, 4));
9845 
9846  if (length == 0) {
9847  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9848  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9849  } else {
9850  const uint8_t *start = parser->current.end - 2;
9851  PM_PARSER_ERR_FORMAT(parser, start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
9852  }
9853  } else if (length == 4) {
9854  uint32_t value = escape_unicode(parser, parser->current.end, 4);
9855 
9856  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9857  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
9858  }
9859 
9860  escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
9861  parser->current.end += 4;
9862  } else {
9863  parser->current.end += length;
9864 
9865  if (flags & PM_ESCAPE_FLAG_REGEXP) {
9866  // If this is a regular expression, we are going to let
9867  // the regular expression engine handle this error
9868  // instead of us.
9869  pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
9870  } else {
9871  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
9872  }
9873  }
9874  }
9875 
9876  return;
9877  }
9878  case 'c': {
9879  parser->current.end++;
9880  if (flags & PM_ESCAPE_FLAG_CONTROL) {
9881  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9882  }
9883 
9884  if (parser->current.end == parser->end) {
9885  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9886  return;
9887  }
9888 
9889  uint8_t peeked = peek(parser);
9890  switch (peeked) {
9891  case '?': {
9892  parser->current.end++;
9893  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9894  return;
9895  }
9896  case '\\':
9897  parser->current.end++;
9898 
9899  if (match(parser, 'u') || match(parser, 'U')) {
9900  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9901  return;
9902  }
9903 
9904  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9905  return;
9906  case ' ':
9907  parser->current.end++;
9908  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9909  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9910  return;
9911  case '\t':
9912  parser->current.end++;
9913  escape_read_warn(parser, flags, 0, "\\t");
9914  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9915  return;
9916  default: {
9917  if (!char_is_ascii_printable(peeked)) {
9918  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9919  return;
9920  }
9921 
9922  parser->current.end++;
9923  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9924  return;
9925  }
9926  }
9927  }
9928  case 'C': {
9929  parser->current.end++;
9930  if (flags & PM_ESCAPE_FLAG_CONTROL) {
9931  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9932  }
9933 
9934  if (peek(parser) != '-') {
9935  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9936  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9937  return;
9938  }
9939 
9940  parser->current.end++;
9941  if (parser->current.end == parser->end) {
9942  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9943  return;
9944  }
9945 
9946  uint8_t peeked = peek(parser);
9947  switch (peeked) {
9948  case '?': {
9949  parser->current.end++;
9950  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9951  return;
9952  }
9953  case '\\':
9954  parser->current.end++;
9955 
9956  if (match(parser, 'u') || match(parser, 'U')) {
9957  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
9958  return;
9959  }
9960 
9961  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9962  return;
9963  case ' ':
9964  parser->current.end++;
9965  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL, "\\s");
9966  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9967  return;
9968  case '\t':
9969  parser->current.end++;
9970  escape_read_warn(parser, flags, 0, "\\t");
9971  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9972  return;
9973  default: {
9974  if (!char_is_ascii_printable(peeked)) {
9975  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9976  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_CONTROL);
9977  return;
9978  }
9979 
9980  parser->current.end++;
9981  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9982  return;
9983  }
9984  }
9985  }
9986  case 'M': {
9987  parser->current.end++;
9988  if (flags & PM_ESCAPE_FLAG_META) {
9989  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9990  }
9991 
9992  if (peek(parser) != '-') {
9993  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
9994  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
9995  return;
9996  }
9997 
9998  parser->current.end++;
9999  if (parser->current.end == parser->end) {
10000  pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
10001  return;
10002  }
10003 
10004  uint8_t peeked = peek(parser);
10005  switch (peeked) {
10006  case '\\':
10007  parser->current.end++;
10008 
10009  if (match(parser, 'u') || match(parser, 'U')) {
10010  pm_parser_err(parser, parser->current.start, parser->current.end, PM_ERR_INVALID_ESCAPE_CHARACTER);
10011  return;
10012  }
10013 
10014  escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
10015  return;
10016  case ' ':
10017  parser->current.end++;
10018  escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META, "\\s");
10019  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10020  return;
10021  case '\t':
10022  parser->current.end++;
10023  escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META, "\\t");
10024  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10025  return;
10026  default:
10027  if (!char_is_ascii_printable(peeked)) {
10028  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10029  pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
10030  return;
10031  }
10032 
10033  parser->current.end++;
10034  escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
10035  return;
10036  }
10037  }
10038  case '\r': {
10039  if (peek_offset(parser, 1) == '\n') {
10040  parser->current.end += 2;
10041  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10042  return;
10043  }
10044  }
10045  /* fallthrough */
10046  default: {
10047  if (parser->current.end < parser->end) {
10048  escape_write_escape_encoded(parser, buffer);
10049  } else {
10050  pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
10051  }
10052  return;
10053  }
10054  }
10055 }
10056 
10082 static pm_token_type_t
10083 lex_question_mark(pm_parser_t *parser) {
10084  if (lex_state_end_p(parser)) {
10085  lex_state_set(parser, PM_LEX_STATE_BEG);
10086  return PM_TOKEN_QUESTION_MARK;
10087  }
10088 
10089  if (parser->current.end >= parser->end) {
10090  pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
10091  pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10093  }
10094 
10095  if (pm_char_is_whitespace(*parser->current.end)) {
10096  lex_state_set(parser, PM_LEX_STATE_BEG);
10097  return PM_TOKEN_QUESTION_MARK;
10098  }
10099 
10100  lex_state_set(parser, PM_LEX_STATE_BEG);
10101 
10102  if (match(parser, '\\')) {
10103  lex_state_set(parser, PM_LEX_STATE_END);
10104 
10105  pm_buffer_t buffer;
10106  pm_buffer_init_capacity(&buffer, 3);
10107 
10108  escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
10109  pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
10110 
10112  } else {
10113  size_t encoding_width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10114 
10115  // Ternary operators can have a ? immediately followed by an identifier
10116  // which starts with an underscore. We check for this case here.
10117  if (
10118  !(parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end) || peek(parser) == '_') ||
10119  (
10120  (parser->current.end + encoding_width >= parser->end) ||
10121  !char_is_identifier(parser, parser->current.end + encoding_width)
10122  )
10123  ) {
10124  lex_state_set(parser, PM_LEX_STATE_END);
10125  parser->current.end += encoding_width;
10126  pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
10128  }
10129  }
10130 
10131  return PM_TOKEN_QUESTION_MARK;
10132 }
10133 
10138 static pm_token_type_t
10139 lex_at_variable(pm_parser_t *parser) {
10141  size_t width;
10142 
10143  if (parser->current.end < parser->end && (width = char_is_identifier_start(parser, parser->current.end)) > 0) {
10144  parser->current.end += width;
10145 
10146  while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
10147  parser->current.end += width;
10148  }
10149  } else if (parser->current.end < parser->end && pm_char_is_decimal_digit(*parser->current.end)) {
10150  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
10151  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) {
10152  diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
10153  }
10154 
10155  size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10156  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
10157  } else {
10158  pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
10159  pm_parser_err_token(parser, &parser->current, diag_id);
10160  }
10161 
10162  // If we're lexing an embedded variable, then we need to pop back into the
10163  // parent lex context.
10164  if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
10165  lex_mode_pop(parser);
10166  }
10167 
10168  return type;
10169 }
10170 
10174 static inline void
10175 parser_lex_callback(pm_parser_t *parser) {
10176  if (parser->lex_callback) {
10177  parser->lex_callback->callback(parser->lex_callback->data, parser, &parser->current);
10178  }
10179 }
10180 
10184 static inline pm_comment_t *
10185 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
10186  pm_comment_t *comment = (pm_comment_t *) xcalloc(1, sizeof(pm_comment_t));
10187  if (comment == NULL) return NULL;
10188 
10189  *comment = (pm_comment_t) {
10190  .type = type,
10191  .location = { parser->current.start, parser->current.end }
10192  };
10193 
10194  return comment;
10195 }
10196 
10202 static pm_token_type_t
10203 lex_embdoc(pm_parser_t *parser) {
10204  // First, lex out the EMBDOC_BEGIN token.
10205  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10206 
10207  if (newline == NULL) {
10208  parser->current.end = parser->end;
10209  } else {
10210  pm_newline_list_append(&parser->newline_list, newline);
10211  parser->current.end = newline + 1;
10212  }
10213 
10214  parser->current.type = PM_TOKEN_EMBDOC_BEGIN;
10215  parser_lex_callback(parser);
10216 
10217  // Now, create a comment that is going to be attached to the parser.
10218  pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
10219  if (comment == NULL) return PM_TOKEN_EOF;
10220 
10221  // Now, loop until we find the end of the embedded documentation or the end
10222  // of the file.
10223  while (parser->current.end + 4 <= parser->end) {
10224  parser->current.start = parser->current.end;
10225 
10226  // If we've hit the end of the embedded documentation then we'll return
10227  // that token here.
10228  if (
10229  (memcmp(parser->current.end, "=end", 4) == 0) &&
10230  (
10231  (parser->current.end + 4 == parser->end) || // end of file
10232  pm_char_is_whitespace(parser->current.end[4]) || // whitespace
10233  (parser->current.end[4] == '\0') || // NUL or end of script
10234  (parser->current.end[4] == '\004') || // ^D
10235  (parser->current.end[4] == '\032') // ^Z
10236  )
10237  ) {
10238  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10239 
10240  if (newline == NULL) {
10241  parser->current.end = parser->end;
10242  } else {
10243  pm_newline_list_append(&parser->newline_list, newline);
10244  parser->current.end = newline + 1;
10245  }
10246 
10247  parser->current.type = PM_TOKEN_EMBDOC_END;
10248  parser_lex_callback(parser);
10249 
10250  comment->location.end = parser->current.end;
10251  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10252 
10253  return PM_TOKEN_EMBDOC_END;
10254  }
10255 
10256  // Otherwise, we'll parse until the end of the line and return a line of
10257  // embedded documentation.
10258  const uint8_t *newline = next_newline(parser->current.end, parser->end - parser->current.end);
10259 
10260  if (newline == NULL) {
10261  parser->current.end = parser->end;
10262  } else {
10263  pm_newline_list_append(&parser->newline_list, newline);
10264  parser->current.end = newline + 1;
10265  }
10266 
10267  parser->current.type = PM_TOKEN_EMBDOC_LINE;
10268  parser_lex_callback(parser);
10269  }
10270 
10271  pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
10272 
10273  comment->location.end = parser->current.end;
10274  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10275 
10276  return PM_TOKEN_EOF;
10277 }
10278 
10284 static inline void
10285 parser_lex_ignored_newline(pm_parser_t *parser) {
10286  parser->current.type = PM_TOKEN_IGNORED_NEWLINE;
10287  parser_lex_callback(parser);
10288 }
10289 
10299 static inline void
10300 parser_flush_heredoc_end(pm_parser_t *parser) {
10301  assert(parser->heredoc_end <= parser->end);
10302  parser->next_start = parser->heredoc_end;
10303  parser->heredoc_end = NULL;
10304 }
10305 
10309 static bool
10310 parser_end_of_line_p(const pm_parser_t *parser) {
10311  const uint8_t *cursor = parser->current.end;
10312 
10313  while (cursor < parser->end && *cursor != '\n' && *cursor != '#') {
10314  if (!pm_char_is_inline_whitespace(*cursor++)) return false;
10315  }
10316 
10317  return true;
10318 }
10319 
10338 typedef struct {
10344 
10349  const uint8_t *cursor;
10351 
10364 typedef struct {
10367 
10371 
10375 static inline void
10376 pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
10377  pm_buffer_append_byte(&token_buffer->buffer, byte);
10378 }
10379 
10380 static inline void
10381 pm_regexp_token_buffer_push_byte(pm_regexp_token_buffer_t *token_buffer, uint8_t byte) {
10382  pm_buffer_append_byte(&token_buffer->regexp_buffer, byte);
10383 }
10384 
10388 static inline size_t
10389 parser_char_width(const pm_parser_t *parser) {
10390  size_t width;
10391  if (parser->encoding_changed) {
10392  width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
10393  } else {
10394  width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
10395  }
10396 
10397  // TODO: If the character is invalid in the given encoding, then we'll just
10398  // push one byte into the buffer. This should actually be an error.
10399  return (width == 0 ? 1 : width);
10400 }
10401 
10405 static void
10406 pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
10407  size_t width = parser_char_width(parser);
10408  pm_buffer_append_bytes(&token_buffer->buffer, parser->current.end, width);
10409  parser->current.end += width;
10410 }
10411 
10412 static void
10413 pm_regexp_token_buffer_push_escaped(pm_regexp_token_buffer_t *token_buffer, pm_parser_t *parser) {
10414  size_t width = parser_char_width(parser);
10415  pm_buffer_append_bytes(&token_buffer->base.buffer, parser->current.end, width);
10416  pm_buffer_append_bytes(&token_buffer->regexp_buffer, parser->current.end, width);
10417  parser->current.end += width;
10418 }
10419 
10420 static bool
10421 pm_slice_ascii_only_p(const uint8_t *value, size_t length) {
10422  for (size_t index = 0; index < length; index++) {
10423  if (value[index] & 0x80) return false;
10424  }
10425 
10426  return true;
10427 }
10428 
10435 static inline void
10436 pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10437  pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->buffer), pm_buffer_length(&token_buffer->buffer));
10438 }
10439 
10440 static inline void
10441 pm_regexp_token_buffer_copy(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10442  pm_string_owned_init(&parser->current_string, (uint8_t *) pm_buffer_value(&token_buffer->base.buffer), pm_buffer_length(&token_buffer->base.buffer));
10443  parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p((const uint8_t *) pm_buffer_value(&token_buffer->regexp_buffer), pm_buffer_length(&token_buffer->regexp_buffer));
10444  pm_buffer_free(&token_buffer->regexp_buffer);
10445 }
10446 
10456 static void
10457 pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10458  if (token_buffer->cursor == NULL) {
10459  pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10460  } else {
10461  pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
10462  pm_token_buffer_copy(parser, token_buffer);
10463  }
10464 }
10465 
10466 static void
10467 pm_regexp_token_buffer_flush(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10468  if (token_buffer->base.cursor == NULL) {
10469  pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
10470  parser->current_regular_expression_ascii_only = pm_slice_ascii_only_p(parser->current.start, (size_t) (parser->current.end - parser->current.start));
10471  } else {
10472  pm_buffer_append_bytes(&token_buffer->base.buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10473  pm_buffer_append_bytes(&token_buffer->regexp_buffer, token_buffer->base.cursor, (size_t) (parser->current.end - token_buffer->base.cursor));
10474  pm_regexp_token_buffer_copy(parser, token_buffer);
10475  }
10476 }
10477 
10478 #define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
10479 
10488 static void
10489 pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10490  const uint8_t *start;
10491  if (token_buffer->cursor == NULL) {
10492  pm_buffer_init_capacity(&token_buffer->buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10493  start = parser->current.start;
10494  } else {
10495  start = token_buffer->cursor;
10496  }
10497 
10498  const uint8_t *end = parser->current.end - 1;
10499  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10500 
10501  token_buffer->cursor = end;
10502 }
10503 
10504 static void
10505 pm_regexp_token_buffer_escape(pm_parser_t *parser, pm_regexp_token_buffer_t *token_buffer) {
10506  const uint8_t *start;
10507  if (token_buffer->base.cursor == NULL) {
10508  pm_buffer_init_capacity(&token_buffer->base.buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10509  pm_buffer_init_capacity(&token_buffer->regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
10510  start = parser->current.start;
10511  } else {
10512  start = token_buffer->base.cursor;
10513  }
10514 
10515  const uint8_t *end = parser->current.end - 1;
10516  pm_buffer_append_bytes(&token_buffer->base.buffer, start, (size_t) (end - start));
10517  pm_buffer_append_bytes(&token_buffer->regexp_buffer, start, (size_t) (end - start));
10518 
10519  token_buffer->base.cursor = end;
10520 }
10521 
10522 #undef PM_TOKEN_BUFFER_DEFAULT_SIZE
10523 
10528 static inline size_t
10529 pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
10530  size_t whitespace = 0;
10531 
10532  switch (indent) {
10533  case PM_HEREDOC_INDENT_NONE:
10534  // Do nothing, we can't match a terminator with
10535  // indentation and there's no need to calculate common
10536  // whitespace.
10537  break;
10538  case PM_HEREDOC_INDENT_DASH:
10539  // Skip past inline whitespace.
10540  *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
10541  break;
10542  case PM_HEREDOC_INDENT_TILDE:
10543  // Skip past inline whitespace and calculate common
10544  // whitespace.
10545  while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
10546  if (**cursor == '\t') {
10547  whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
10548  } else {
10549  whitespace++;
10550  }
10551  (*cursor)++;
10552  }
10553 
10554  break;
10555  }
10556 
10557  return whitespace;
10558 }
10559 
10564 static uint8_t
10565 pm_lex_percent_delimiter(pm_parser_t *parser) {
10566  size_t eol_length = match_eol(parser);
10567 
10568  if (eol_length) {
10569  if (parser->heredoc_end) {
10570  // If we have already lexed a heredoc, then the newline has already
10571  // been added to the list. In this case we want to just flush the
10572  // heredoc end.
10573  parser_flush_heredoc_end(parser);
10574  } else {
10575  // Otherwise, we'll add the newline to the list of newlines.
10576  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10577  }
10578 
10579  const uint8_t delimiter = *parser->current.end;
10580  parser->current.end += eol_length;
10581 
10582  return delimiter;
10583  }
10584 
10585  return *parser->current.end++;
10586 }
10587 
10592 #define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
10593 
10600 static void
10601 parser_lex(pm_parser_t *parser) {
10602  assert(parser->current.end <= parser->end);
10603  parser->previous = parser->current;
10604 
10605  // This value mirrors cmd_state from CRuby.
10606  bool previous_command_start = parser->command_start;
10607  parser->command_start = false;
10608 
10609  // This is used to communicate to the newline lexing function that we've
10610  // already seen a comment.
10611  bool lexed_comment = false;
10612 
10613  // Here we cache the current value of the semantic token seen flag. This is
10614  // used to reset it in case we find a token that shouldn't flip this flag.
10615  unsigned int semantic_token_seen = parser->semantic_token_seen;
10616  parser->semantic_token_seen = true;
10617 
10618  switch (parser->lex_modes.current->mode) {
10619  case PM_LEX_DEFAULT:
10620  case PM_LEX_EMBEXPR:
10621  case PM_LEX_EMBVAR:
10622 
10623  // We have a specific named label here because we are going to jump back to
10624  // this location in the event that we have lexed a token that should not be
10625  // returned to the parser. This includes comments, ignored newlines, and
10626  // invalid tokens of some form.
10627  lex_next_token: {
10628  // If we have the special next_start pointer set, then we're going to jump
10629  // to that location and start lexing from there.
10630  if (parser->next_start != NULL) {
10631  parser->current.end = parser->next_start;
10632  parser->next_start = NULL;
10633  }
10634 
10635  // This value mirrors space_seen from CRuby. It tracks whether or not
10636  // space has been eaten before the start of the next token.
10637  bool space_seen = false;
10638 
10639  // First, we're going to skip past any whitespace at the front of the next
10640  // token.
10641  bool chomping = true;
10642  while (parser->current.end < parser->end && chomping) {
10643  switch (*parser->current.end) {
10644  case ' ':
10645  case '\t':
10646  case '\f':
10647  case '\v':
10648  parser->current.end++;
10649  space_seen = true;
10650  break;
10651  case '\r':
10652  if (match_eol_offset(parser, 1)) {
10653  chomping = false;
10654  } else {
10655  pm_parser_warn(parser, parser->current.end, parser->current.end + 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
10656  parser->current.end++;
10657  space_seen = true;
10658  }
10659  break;
10660  case '\\': {
10661  size_t eol_length = match_eol_offset(parser, 1);
10662  if (eol_length) {
10663  if (parser->heredoc_end) {
10664  parser->current.end = parser->heredoc_end;
10665  parser->heredoc_end = NULL;
10666  } else {
10667  parser->current.end += eol_length + 1;
10668  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10669  space_seen = true;
10670  }
10671  } else if (pm_char_is_inline_whitespace(*parser->current.end)) {
10672  parser->current.end += 2;
10673  } else {
10674  chomping = false;
10675  }
10676 
10677  break;
10678  }
10679  default:
10680  chomping = false;
10681  break;
10682  }
10683  }
10684 
10685  // Next, we'll set to start of this token to be the current end.
10686  parser->current.start = parser->current.end;
10687 
10688  // We'll check if we're at the end of the file. If we are, then we
10689  // need to return the EOF token.
10690  if (parser->current.end >= parser->end) {
10691  LEX(PM_TOKEN_EOF);
10692  }
10693 
10694  // Finally, we'll check the current character to determine the next
10695  // token.
10696  switch (*parser->current.end++) {
10697  case '\0': // NUL or end of script
10698  case '\004': // ^D
10699  case '\032': // ^Z
10700  parser->current.end--;
10701  LEX(PM_TOKEN_EOF);
10702 
10703  case '#': { // comments
10704  const uint8_t *ending = next_newline(parser->current.end, parser->end - parser->current.end);
10705  parser->current.end = ending == NULL ? parser->end : ending;
10706 
10707  // If we found a comment while lexing, then we're going to
10708  // add it to the list of comments in the file and keep
10709  // lexing.
10710  pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
10711  pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
10712 
10713  if (ending) parser->current.end++;
10714  parser->current.type = PM_TOKEN_COMMENT;
10715  parser_lex_callback(parser);
10716 
10717  // Here, parse the comment to see if it's a magic comment
10718  // and potentially change state on the parser.
10719  if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
10720  ptrdiff_t length = parser->current.end - parser->current.start;
10721 
10722  // If we didn't find a magic comment within the first
10723  // pass and we're at the start of the file, then we need
10724  // to do another pass to potentially find other patterns
10725  // for encoding comments.
10726  if (length >= 10 && !parser->encoding_locked) {
10727  parser_lex_magic_comment_encoding(parser);
10728  }
10729  }
10730 
10731  lexed_comment = true;
10732  }
10733  /* fallthrough */
10734  case '\r':
10735  case '\n': {
10736  parser->semantic_token_seen = semantic_token_seen & 0x1;
10737  size_t eol_length = match_eol_at(parser, parser->current.end - 1);
10738 
10739  if (eol_length) {
10740  // The only way you can have carriage returns in this
10741  // particular loop is if you have a carriage return
10742  // followed by a newline. In that case we'll just skip
10743  // over the carriage return and continue lexing, in
10744  // order to make it so that the newline token
10745  // encapsulates both the carriage return and the
10746  // newline. Note that we need to check that we haven't
10747  // already lexed a comment here because that falls
10748  // through into here as well.
10749  if (!lexed_comment) {
10750  parser->current.end += eol_length - 1; // skip CR
10751  }
10752 
10753  if (parser->heredoc_end == NULL) {
10754  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
10755  }
10756  }
10757 
10758  if (parser->heredoc_end) {
10759  parser_flush_heredoc_end(parser);
10760  }
10761 
10762  // If this is an ignored newline, then we can continue lexing after
10763  // calling the callback with the ignored newline token.
10764  switch (lex_state_ignored_p(parser)) {
10765  case PM_IGNORED_NEWLINE_NONE:
10766  break;
10767  case PM_IGNORED_NEWLINE_PATTERN:
10768  if (parser->pattern_matching_newlines || parser->in_keyword_arg) {
10769  if (!lexed_comment) parser_lex_ignored_newline(parser);
10770  lex_state_set(parser, PM_LEX_STATE_BEG);
10771  parser->command_start = true;
10772  parser->current.type = PM_TOKEN_NEWLINE;
10773  return;
10774  }
10775  /* fallthrough */
10776  case PM_IGNORED_NEWLINE_ALL:
10777  if (!lexed_comment) parser_lex_ignored_newline(parser);
10778  lexed_comment = false;
10779  goto lex_next_token;
10780  }
10781 
10782  // Here we need to look ahead and see if there is a call operator
10783  // (either . or &.) that starts the next line. If there is, then this
10784  // is going to become an ignored newline and we're going to instead
10785  // return the call operator.
10786  const uint8_t *next_content = parser->next_start == NULL ? parser->current.end : parser->next_start;
10787  next_content += pm_strspn_inline_whitespace(next_content, parser->end - next_content);
10788 
10789  if (next_content < parser->end) {
10790  // If we hit a comment after a newline, then we're going to check
10791  // if it's ignored or if it's followed by a method call ('.').
10792  // If it is, then we're going to call the
10793  // callback with an ignored newline and then continue lexing.
10794  // Otherwise we'll return a regular newline.
10795  if (next_content[0] == '#') {
10796  // Here we look for a "." or "&." following a "\n".
10797  const uint8_t *following = next_newline(next_content, parser->end - next_content);
10798 
10799  while (following && (following + 1 < parser->end)) {
10800  following++;
10801  following += pm_strspn_inline_whitespace(following, parser->end - following);
10802 
10803  // If this is not followed by a comment, then we can break out
10804  // of this loop.
10805  if (peek_at(parser, following) != '#') break;
10806 
10807  // If there is a comment, then we need to find the end of the
10808  // comment and continue searching from there.
10809  following = next_newline(following, parser->end - following);
10810  }
10811 
10812  // If the lex state was ignored, or we hit a '.' or a '&.',
10813  // we will lex the ignored newline
10814  if (
10815  lex_state_ignored_p(parser) ||
10816  (following && (
10817  (peek_at(parser, following) == '.') ||
10818  (peek_at(parser, following) == '&' && peek_at(parser, following + 1) == '.')
10819  ))
10820  ) {
10821  if (!lexed_comment) parser_lex_ignored_newline(parser);
10822  lexed_comment = false;
10823  goto lex_next_token;
10824  }
10825  }
10826 
10827  // If we hit a . after a newline, then we're in a call chain and
10828  // we need to return the call operator.
10829  if (next_content[0] == '.') {
10830  // To match ripper, we need to emit an ignored newline even though
10831  // it's a real newline in the case that we have a beginless range
10832  // on a subsequent line.
10833  if (peek_at(parser, next_content + 1) == '.') {
10834  if (!lexed_comment) parser_lex_ignored_newline(parser);
10835  lex_state_set(parser, PM_LEX_STATE_BEG);
10836  parser->command_start = true;
10837  parser->current.type = PM_TOKEN_NEWLINE;
10838  return;
10839  }
10840 
10841  if (!lexed_comment) parser_lex_ignored_newline(parser);
10842  lex_state_set(parser, PM_LEX_STATE_DOT);
10843  parser->current.start = next_content;
10844  parser->current.end = next_content + 1;
10845  parser->next_start = NULL;
10846  LEX(PM_TOKEN_DOT);
10847  }
10848 
10849  // If we hit a &. after a newline, then we're in a call chain and
10850  // we need to return the call operator.
10851  if (peek_at(parser, next_content) == '&' && peek_at(parser, next_content + 1) == '.') {
10852  if (!lexed_comment) parser_lex_ignored_newline(parser);
10853  lex_state_set(parser, PM_LEX_STATE_DOT);
10854  parser->current.start = next_content;
10855  parser->current.end = next_content + 2;
10856  parser->next_start = NULL;
10858  }
10859  }
10860 
10861  // At this point we know this is a regular newline, and we can set the
10862  // necessary state and return the token.
10863  lex_state_set(parser, PM_LEX_STATE_BEG);
10864  parser->command_start = true;
10865  parser->current.type = PM_TOKEN_NEWLINE;
10866  if (!lexed_comment) parser_lex_callback(parser);
10867  return;
10868  }
10869 
10870  // ,
10871  case ',':
10872  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10873  LEX(PM_TOKEN_COMMA);
10874 
10875  // (
10876  case '(': {
10878 
10879  if (space_seen && (lex_state_arg_p(parser) || parser->lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10881  }
10882 
10883  parser->enclosure_nesting++;
10884  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10885  pm_do_loop_stack_push(parser, false);
10886  LEX(type);
10887  }
10888 
10889  // )
10890  case ')':
10891  parser->enclosure_nesting--;
10892  lex_state_set(parser, PM_LEX_STATE_ENDFN);
10893  pm_do_loop_stack_pop(parser);
10895 
10896  // ;
10897  case ';':
10898  lex_state_set(parser, PM_LEX_STATE_BEG);
10899  parser->command_start = true;
10900  LEX(PM_TOKEN_SEMICOLON);
10901 
10902  // [ [] []=
10903  case '[':
10904  parser->enclosure_nesting++;
10906 
10907  if (lex_state_operator_p(parser)) {
10908  if (match(parser, ']')) {
10909  parser->enclosure_nesting--;
10910  lex_state_set(parser, PM_LEX_STATE_ARG);
10912  }
10913 
10914  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10915  LEX(type);
10916  }
10917 
10918  if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10920  }
10921 
10922  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10923  pm_do_loop_stack_push(parser, false);
10924  LEX(type);
10925 
10926  // ]
10927  case ']':
10928  parser->enclosure_nesting--;
10929  lex_state_set(parser, PM_LEX_STATE_END);
10930  pm_do_loop_stack_pop(parser);
10932 
10933  // {
10934  case '{': {
10936 
10937  if (parser->enclosure_nesting == parser->lambda_enclosure_nesting) {
10938  // This { begins a lambda
10939  parser->command_start = true;
10940  lex_state_set(parser, PM_LEX_STATE_BEG);
10942  } else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10943  // This { begins a hash literal
10944  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10945  } else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10946  // This { begins a block
10947  parser->command_start = true;
10948  lex_state_set(parser, PM_LEX_STATE_BEG);
10949  } else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10950  // This { begins a block on a command
10951  parser->command_start = true;
10952  lex_state_set(parser, PM_LEX_STATE_BEG);
10953  } else {
10954  // This { begins a hash literal
10955  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10956  }
10957 
10958  parser->enclosure_nesting++;
10959  parser->brace_nesting++;
10960  pm_do_loop_stack_push(parser, false);
10961 
10962  LEX(type);
10963  }
10964 
10965  // }
10966  case '}':
10967  parser->enclosure_nesting--;
10968  pm_do_loop_stack_pop(parser);
10969 
10970  if ((parser->lex_modes.current->mode == PM_LEX_EMBEXPR) && (parser->brace_nesting == 0)) {
10971  lex_mode_pop(parser);
10972  LEX(PM_TOKEN_EMBEXPR_END);
10973  }
10974 
10975  parser->brace_nesting--;
10976  lex_state_set(parser, PM_LEX_STATE_END);
10977  LEX(PM_TOKEN_BRACE_RIGHT);
10978 
10979  // * ** **= *=
10980  case '*': {
10981  if (match(parser, '*')) {
10982  if (match(parser, '=')) {
10983  lex_state_set(parser, PM_LEX_STATE_BEG);
10985  }
10986 
10988 
10989  if (lex_state_spcarg_p(parser, space_seen)) {
10990  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10992  } else if (lex_state_beg_p(parser)) {
10994  } else if (ambiguous_operator_p(parser, space_seen)) {
10995  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "**", "argument prefix");
10996  }
10997 
10998  if (lex_state_operator_p(parser)) {
10999  lex_state_set(parser, PM_LEX_STATE_ARG);
11000  } else {
11001  lex_state_set(parser, PM_LEX_STATE_BEG);
11002  }
11003 
11004  LEX(type);
11005  }
11006 
11007  if (match(parser, '=')) {
11008  lex_state_set(parser, PM_LEX_STATE_BEG);
11009  LEX(PM_TOKEN_STAR_EQUAL);
11010  }
11011 
11013 
11014  if (lex_state_spcarg_p(parser, space_seen)) {
11015  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
11016  type = PM_TOKEN_USTAR;
11017  } else if (lex_state_beg_p(parser)) {
11018  type = PM_TOKEN_USTAR;
11019  } else if (ambiguous_operator_p(parser, space_seen)) {
11020  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "*", "argument prefix");
11021  }
11022 
11023  if (lex_state_operator_p(parser)) {
11024  lex_state_set(parser, PM_LEX_STATE_ARG);
11025  } else {
11026  lex_state_set(parser, PM_LEX_STATE_BEG);
11027  }
11028 
11029  LEX(type);
11030  }
11031 
11032  // ! != !~ !@
11033  case '!':
11034  if (lex_state_operator_p(parser)) {
11035  lex_state_set(parser, PM_LEX_STATE_ARG);
11036  if (match(parser, '@')) {
11037  LEX(PM_TOKEN_BANG);
11038  }
11039  } else {
11040  lex_state_set(parser, PM_LEX_STATE_BEG);
11041  }
11042 
11043  if (match(parser, '=')) {
11044  LEX(PM_TOKEN_BANG_EQUAL);
11045  }
11046 
11047  if (match(parser, '~')) {
11048  LEX(PM_TOKEN_BANG_TILDE);
11049  }
11050 
11051  LEX(PM_TOKEN_BANG);
11052 
11053  // = => =~ == === =begin
11054  case '=':
11055  if (
11056  current_token_starts_line(parser) &&
11057  (parser->current.end + 5 <= parser->end) &&
11058  memcmp(parser->current.end, "begin", 5) == 0 &&
11059  (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) == '\0'))
11060  ) {
11061  pm_token_type_t type = lex_embdoc(parser);
11062  if (type == PM_TOKEN_EOF) {
11063  LEX(type);
11064  }
11065 
11066  goto lex_next_token;
11067  }
11068 
11069  if (lex_state_operator_p(parser)) {
11070  lex_state_set(parser, PM_LEX_STATE_ARG);
11071  } else {
11072  lex_state_set(parser, PM_LEX_STATE_BEG);
11073  }
11074 
11075  if (match(parser, '>')) {
11077  }
11078 
11079  if (match(parser, '~')) {
11080  LEX(PM_TOKEN_EQUAL_TILDE);
11081  }
11082 
11083  if (match(parser, '=')) {
11084  LEX(match(parser, '=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
11085  }
11086 
11087  LEX(PM_TOKEN_EQUAL);
11088 
11089  // < << <<= <= <=>
11090  case '<':
11091  if (match(parser, '<')) {
11092  if (
11093  !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
11094  !lex_state_end_p(parser) &&
11095  (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
11096  ) {
11097  const uint8_t *end = parser->current.end;
11098 
11099  pm_heredoc_quote_t quote = PM_HEREDOC_QUOTE_NONE;
11100  pm_heredoc_indent_t indent = PM_HEREDOC_INDENT_NONE;
11101 
11102  if (match(parser, '-')) {
11103  indent = PM_HEREDOC_INDENT_DASH;
11104  }
11105  else if (match(parser, '~')) {
11106  indent = PM_HEREDOC_INDENT_TILDE;
11107  }
11108 
11109  if (match(parser, '`')) {
11110  quote = PM_HEREDOC_QUOTE_BACKTICK;
11111  }
11112  else if (match(parser, '"')) {
11113  quote = PM_HEREDOC_QUOTE_DOUBLE;
11114  }
11115  else if (match(parser, '\'')) {
11116  quote = PM_HEREDOC_QUOTE_SINGLE;
11117  }
11118 
11119  const uint8_t *ident_start = parser->current.end;
11120  size_t width = 0;
11121 
11122  if (parser->current.end >= parser->end) {
11123  parser->current.end = end;
11124  } else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->current.end)) == 0) {
11125  parser->current.end = end;
11126  } else {
11127  if (quote == PM_HEREDOC_QUOTE_NONE) {
11128  parser->current.end += width;
11129 
11130  while ((parser->current.end < parser->end) && (width = char_is_identifier(parser, parser->current.end))) {
11131  parser->current.end += width;
11132  }
11133  } else {
11134  // If we have quotes, then we're going to go until we find the
11135  // end quote.
11136  while ((parser->current.end < parser->end) && quote != (pm_heredoc_quote_t) (*parser->current.end)) {
11137  if (*parser->current.end == '\r' || *parser->current.end == '\n') break;
11138  parser->current.end++;
11139  }
11140  }
11141 
11142  size_t ident_length = (size_t) (parser->current.end - ident_start);
11143  bool ident_error = false;
11144 
11145  if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
11146  pm_parser_err(parser, ident_start, ident_start + ident_length, PM_ERR_HEREDOC_IDENTIFIER);
11147  ident_error = true;
11148  }
11149 
11150  parser->explicit_encoding = NULL;
11151  lex_mode_push(parser, (pm_lex_mode_t) {
11152  .mode = PM_LEX_HEREDOC,
11153  .as.heredoc = {
11154  .base = {
11155  .ident_start = ident_start,
11156  .ident_length = ident_length,
11157  .quote = quote,
11158  .indent = indent
11159  },
11160  .next_start = parser->current.end,
11161  .common_whitespace = NULL,
11162  .line_continuation = false
11163  }
11164  });
11165 
11166  if (parser->heredoc_end == NULL) {
11167  const uint8_t *body_start = next_newline(parser->current.end, parser->end - parser->current.end);
11168 
11169  if (body_start == NULL) {
11170  // If there is no newline after the heredoc identifier, then
11171  // this is not a valid heredoc declaration. In this case we
11172  // will add an error, but we will still return a heredoc
11173  // start.
11174  if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
11175  body_start = parser->end;
11176  } else {
11177  // Otherwise, we want to indicate that the body of the
11178  // heredoc starts on the character after the next newline.
11179  pm_newline_list_append(&parser->newline_list, body_start);
11180  body_start++;
11181  }
11182 
11183  parser->next_start = body_start;
11184  } else {
11185  parser->next_start = parser->heredoc_end;
11186  }
11187 
11189  }
11190  }
11191 
11192  if (match(parser, '=')) {
11193  lex_state_set(parser, PM_LEX_STATE_BEG);
11195  }
11196 
11197  if (ambiguous_operator_p(parser, space_seen)) {
11198  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "<<", "here document");
11199  }
11200 
11201  if (lex_state_operator_p(parser)) {
11202  lex_state_set(parser, PM_LEX_STATE_ARG);
11203  } else {
11204  if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11205  lex_state_set(parser, PM_LEX_STATE_BEG);
11206  }
11207 
11208  LEX(PM_TOKEN_LESS_LESS);
11209  }
11210 
11211  if (lex_state_operator_p(parser)) {
11212  lex_state_set(parser, PM_LEX_STATE_ARG);
11213  } else {
11214  if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->command_start = true;
11215  lex_state_set(parser, PM_LEX_STATE_BEG);
11216  }
11217 
11218  if (match(parser, '=')) {
11219  if (match(parser, '>')) {
11221  }
11222 
11223  LEX(PM_TOKEN_LESS_EQUAL);
11224  }
11225 
11226  LEX(PM_TOKEN_LESS);
11227 
11228  // > >> >>= >=
11229  case '>':
11230  if (match(parser, '>')) {
11231  if (lex_state_operator_p(parser)) {
11232  lex_state_set(parser, PM_LEX_STATE_ARG);
11233  } else {
11234  lex_state_set(parser, PM_LEX_STATE_BEG);
11235  }
11236  LEX(match(parser, '=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
11237  }
11238 
11239  if (lex_state_operator_p(parser)) {
11240  lex_state_set(parser, PM_LEX_STATE_ARG);
11241  } else {
11242  lex_state_set(parser, PM_LEX_STATE_BEG);
11243  }
11244 
11245  LEX(match(parser, '=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
11246 
11247  // double-quoted string literal
11248  case '"': {
11249  bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11250  lex_mode_push_string(parser, true, label_allowed, '\0', '"');
11251  LEX(PM_TOKEN_STRING_BEGIN);
11252  }
11253 
11254  // xstring literal
11255  case '`': {
11256  if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
11257  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11258  LEX(PM_TOKEN_BACKTICK);
11259  }
11260 
11261  if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
11262  if (previous_command_start) {
11263  lex_state_set(parser, PM_LEX_STATE_CMDARG);
11264  } else {
11265  lex_state_set(parser, PM_LEX_STATE_ARG);
11266  }
11267 
11268  LEX(PM_TOKEN_BACKTICK);
11269  }
11270 
11271  lex_mode_push_string(parser, true, false, '\0', '`');
11272  LEX(PM_TOKEN_BACKTICK);
11273  }
11274 
11275  // single-quoted string literal
11276  case '\'': {
11277  bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
11278  lex_mode_push_string(parser, false, label_allowed, '\0', '\'');
11279  LEX(PM_TOKEN_STRING_BEGIN);
11280  }
11281 
11282  // ? character literal
11283  case '?':
11284  LEX(lex_question_mark(parser));
11285 
11286  // & && &&= &=
11287  case '&': {
11288  if (match(parser, '&')) {
11289  lex_state_set(parser, PM_LEX_STATE_BEG);
11290 
11291  if (match(parser, '=')) {
11293  }
11294 
11296  }
11297 
11298  if (match(parser, '=')) {
11299  lex_state_set(parser, PM_LEX_STATE_BEG);
11301  }
11302 
11303  if (match(parser, '.')) {
11304  lex_state_set(parser, PM_LEX_STATE_DOT);
11306  }
11307 
11309  if (lex_state_spcarg_p(parser, space_seen)) {
11310  if ((peek(parser) != ':') || (peek_offset(parser, 1) == '\0')) {
11311  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11312  } else {
11313  const uint8_t delim = peek_offset(parser, 1);
11314 
11315  if ((delim != '\'') && (delim != '"') && !char_is_identifier(parser, parser->current.end + 1)) {
11316  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
11317  }
11318  }
11319 
11321  } else if (lex_state_beg_p(parser)) {
11323  } else if (ambiguous_operator_p(parser, space_seen)) {
11324  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "&", "argument prefix");
11325  }
11326 
11327  if (lex_state_operator_p(parser)) {
11328  lex_state_set(parser, PM_LEX_STATE_ARG);
11329  } else {
11330  lex_state_set(parser, PM_LEX_STATE_BEG);
11331  }
11332 
11333  LEX(type);
11334  }
11335 
11336  // | || ||= |=
11337  case '|':
11338  if (match(parser, '|')) {
11339  if (match(parser, '=')) {
11340  lex_state_set(parser, PM_LEX_STATE_BEG);
11342  }
11343 
11344  if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
11345  parser->current.end--;
11346  LEX(PM_TOKEN_PIPE);
11347  }
11348 
11349  lex_state_set(parser, PM_LEX_STATE_BEG);
11350  LEX(PM_TOKEN_PIPE_PIPE);
11351  }
11352 
11353  if (match(parser, '=')) {
11354  lex_state_set(parser, PM_LEX_STATE_BEG);
11355  LEX(PM_TOKEN_PIPE_EQUAL);
11356  }
11357 
11358  if (lex_state_operator_p(parser)) {
11359  lex_state_set(parser, PM_LEX_STATE_ARG);
11360  } else {
11361  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
11362  }
11363 
11364  LEX(PM_TOKEN_PIPE);
11365 
11366  // + += +@
11367  case '+': {
11368  if (lex_state_operator_p(parser)) {
11369  lex_state_set(parser, PM_LEX_STATE_ARG);
11370 
11371  if (match(parser, '@')) {
11372  LEX(PM_TOKEN_UPLUS);
11373  }
11374 
11375  LEX(PM_TOKEN_PLUS);
11376  }
11377 
11378  if (match(parser, '=')) {
11379  lex_state_set(parser, PM_LEX_STATE_BEG);
11380  LEX(PM_TOKEN_PLUS_EQUAL);
11381  }
11382 
11383  if (
11384  lex_state_beg_p(parser) ||
11385  (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS), true) : false)
11386  ) {
11387  lex_state_set(parser, PM_LEX_STATE_BEG);
11388 
11389  if (pm_char_is_decimal_digit(peek(parser))) {
11390  parser->current.end++;
11391  pm_token_type_t type = lex_numeric(parser);
11392  lex_state_set(parser, PM_LEX_STATE_END);
11393  LEX(type);
11394  }
11395 
11396  LEX(PM_TOKEN_UPLUS);
11397  }
11398 
11399  if (ambiguous_operator_p(parser, space_seen)) {
11400  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "+", "unary operator");
11401  }
11402 
11403  lex_state_set(parser, PM_LEX_STATE_BEG);
11404  LEX(PM_TOKEN_PLUS);
11405  }
11406 
11407  // - -= -@
11408  case '-': {
11409  if (lex_state_operator_p(parser)) {
11410  lex_state_set(parser, PM_LEX_STATE_ARG);
11411 
11412  if (match(parser, '@')) {
11413  LEX(PM_TOKEN_UMINUS);
11414  }
11415 
11416  LEX(PM_TOKEN_MINUS);
11417  }
11418 
11419  if (match(parser, '=')) {
11420  lex_state_set(parser, PM_LEX_STATE_BEG);
11421  LEX(PM_TOKEN_MINUS_EQUAL);
11422  }
11423 
11424  if (match(parser, '>')) {
11425  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11427  }
11428 
11429  bool spcarg = lex_state_spcarg_p(parser, space_seen);
11430  bool is_beg = lex_state_beg_p(parser);
11431  if (!is_beg && spcarg) {
11432  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
11433  }
11434 
11435  if (is_beg || spcarg) {
11436  lex_state_set(parser, PM_LEX_STATE_BEG);
11438  }
11439 
11440  if (ambiguous_operator_p(parser, space_seen)) {
11441  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "-", "unary operator");
11442  }
11443 
11444  lex_state_set(parser, PM_LEX_STATE_BEG);
11445  LEX(PM_TOKEN_MINUS);
11446  }
11447 
11448  // . .. ...
11449  case '.': {
11450  bool beg_p = lex_state_beg_p(parser);
11451 
11452  if (match(parser, '.')) {
11453  if (match(parser, '.')) {
11454  // If we're _not_ inside a range within default parameters
11455  if (!context_p(parser, PM_CONTEXT_DEFAULT_PARAMS) && context_p(parser, PM_CONTEXT_DEF_PARAMS)) {
11456  if (lex_state_p(parser, PM_LEX_STATE_END)) {
11457  lex_state_set(parser, PM_LEX_STATE_BEG);
11458  } else {
11459  lex_state_set(parser, PM_LEX_STATE_ENDARG);
11460  }
11461  LEX(PM_TOKEN_UDOT_DOT_DOT);
11462  }
11463 
11464  if (parser->enclosure_nesting == 0 && parser_end_of_line_p(parser)) {
11465  pm_parser_warn_token(parser, &parser->current, PM_WARN_DOT_DOT_DOT_EOL);
11466  }
11467 
11468  lex_state_set(parser, PM_LEX_STATE_BEG);
11470  }
11471 
11472  lex_state_set(parser, PM_LEX_STATE_BEG);
11473  LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
11474  }
11475 
11476  lex_state_set(parser, PM_LEX_STATE_DOT);
11477  LEX(PM_TOKEN_DOT);
11478  }
11479 
11480  // integer
11481  case '0':
11482  case '1':
11483  case '2':
11484  case '3':
11485  case '4':
11486  case '5':
11487  case '6':
11488  case '7':
11489  case '8':
11490  case '9': {
11491  pm_token_type_t type = lex_numeric(parser);
11492  lex_state_set(parser, PM_LEX_STATE_END);
11493  LEX(type);
11494  }
11495 
11496  // :: symbol
11497  case ':':
11498  if (match(parser, ':')) {
11499  if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
11500  lex_state_set(parser, PM_LEX_STATE_BEG);
11501  LEX(PM_TOKEN_UCOLON_COLON);
11502  }
11503 
11504  lex_state_set(parser, PM_LEX_STATE_DOT);
11505  LEX(PM_TOKEN_COLON_COLON);
11506  }
11507 
11508  if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) == '#') {
11509  lex_state_set(parser, PM_LEX_STATE_BEG);
11510  LEX(PM_TOKEN_COLON);
11511  }
11512 
11513  if (peek(parser) == '"' || peek(parser) == '\'') {
11514  lex_mode_push_string(parser, peek(parser) == '"', false, '\0', *parser->current.end);
11515  parser->current.end++;
11516  }
11517 
11518  lex_state_set(parser, PM_LEX_STATE_FNAME);
11519  LEX(PM_TOKEN_SYMBOL_BEGIN);
11520 
11521  // / /=
11522  case '/':
11523  if (lex_state_beg_p(parser)) {
11524  lex_mode_push_regexp(parser, '\0', '/');
11525  LEX(PM_TOKEN_REGEXP_BEGIN);
11526  }
11527 
11528  if (match(parser, '=')) {
11529  lex_state_set(parser, PM_LEX_STATE_BEG);
11530  LEX(PM_TOKEN_SLASH_EQUAL);
11531  }
11532 
11533  if (lex_state_spcarg_p(parser, space_seen)) {
11534  pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
11535  lex_mode_push_regexp(parser, '\0', '/');
11536  LEX(PM_TOKEN_REGEXP_BEGIN);
11537  }
11538 
11539  if (ambiguous_operator_p(parser, space_seen)) {
11540  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "/", "regexp literal");
11541  }
11542 
11543  if (lex_state_operator_p(parser)) {
11544  lex_state_set(parser, PM_LEX_STATE_ARG);
11545  } else {
11546  lex_state_set(parser, PM_LEX_STATE_BEG);
11547  }
11548 
11549  LEX(PM_TOKEN_SLASH);
11550 
11551  // ^ ^=
11552  case '^':
11553  if (lex_state_operator_p(parser)) {
11554  lex_state_set(parser, PM_LEX_STATE_ARG);
11555  } else {
11556  lex_state_set(parser, PM_LEX_STATE_BEG);
11557  }
11558  LEX(match(parser, '=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
11559 
11560  // ~ ~@
11561  case '~':
11562  if (lex_state_operator_p(parser)) {
11563  (void) match(parser, '@');
11564  lex_state_set(parser, PM_LEX_STATE_ARG);
11565  } else {
11566  lex_state_set(parser, PM_LEX_STATE_BEG);
11567  }
11568 
11569  LEX(PM_TOKEN_TILDE);
11570 
11571  // % %= %i %I %q %Q %w %W
11572  case '%': {
11573  // If there is no subsequent character then we have an
11574  // invalid token. We're going to say it's the percent
11575  // operator because we don't want to move into the string
11576  // lex mode unnecessarily.
11577  if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
11578  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
11579  LEX(PM_TOKEN_PERCENT);
11580  }
11581 
11582  if (!lex_state_beg_p(parser) && match(parser, '=')) {
11583  lex_state_set(parser, PM_LEX_STATE_BEG);
11585  } else if (
11586  lex_state_beg_p(parser) ||
11587  (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) == 's')) ||
11588  lex_state_spcarg_p(parser, space_seen)
11589  ) {
11590  if (!parser->encoding->alnum_char(parser->current.end, parser->end - parser->current.end)) {
11591  if (*parser->current.end >= 0x80) {
11592  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11593  }
11594 
11595  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11596  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11597  LEX(PM_TOKEN_STRING_BEGIN);
11598  }
11599 
11600  // Delimiters for %-literals cannot be alphanumeric. We
11601  // validate that here.
11602  uint8_t delimiter = peek_offset(parser, 1);
11603  if (delimiter >= 0x80 || parser->encoding->alnum_char(&delimiter, 1)) {
11604  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11605  goto lex_next_token;
11606  }
11607 
11608  switch (peek(parser)) {
11609  case 'i': {
11610  parser->current.end++;
11611 
11612  if (parser->current.end < parser->end) {
11613  lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11614  } else {
11615  lex_mode_push_list_eof(parser);
11616  }
11617 
11619  }
11620  case 'I': {
11621  parser->current.end++;
11622 
11623  if (parser->current.end < parser->end) {
11624  lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11625  } else {
11626  lex_mode_push_list_eof(parser);
11627  }
11628 
11630  }
11631  case 'r': {
11632  parser->current.end++;
11633 
11634  if (parser->current.end < parser->end) {
11635  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11636  lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11637  } else {
11638  lex_mode_push_regexp(parser, '\0', '\0');
11639  }
11640 
11641  LEX(PM_TOKEN_REGEXP_BEGIN);
11642  }
11643  case 'q': {
11644  parser->current.end++;
11645 
11646  if (parser->current.end < parser->end) {
11647  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11648  lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11649  } else {
11650  lex_mode_push_string_eof(parser);
11651  }
11652 
11653  LEX(PM_TOKEN_STRING_BEGIN);
11654  }
11655  case 'Q': {
11656  parser->current.end++;
11657 
11658  if (parser->current.end < parser->end) {
11659  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11660  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11661  } else {
11662  lex_mode_push_string_eof(parser);
11663  }
11664 
11665  LEX(PM_TOKEN_STRING_BEGIN);
11666  }
11667  case 's': {
11668  parser->current.end++;
11669 
11670  if (parser->current.end < parser->end) {
11671  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11672  lex_mode_push_string(parser, false, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11673  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
11674  } else {
11675  lex_mode_push_string_eof(parser);
11676  }
11677 
11678  LEX(PM_TOKEN_SYMBOL_BEGIN);
11679  }
11680  case 'w': {
11681  parser->current.end++;
11682 
11683  if (parser->current.end < parser->end) {
11684  lex_mode_push_list(parser, false, pm_lex_percent_delimiter(parser));
11685  } else {
11686  lex_mode_push_list_eof(parser);
11687  }
11688 
11690  }
11691  case 'W': {
11692  parser->current.end++;
11693 
11694  if (parser->current.end < parser->end) {
11695  lex_mode_push_list(parser, true, pm_lex_percent_delimiter(parser));
11696  } else {
11697  lex_mode_push_list_eof(parser);
11698  }
11699 
11701  }
11702  case 'x': {
11703  parser->current.end++;
11704 
11705  if (parser->current.end < parser->end) {
11706  const uint8_t delimiter = pm_lex_percent_delimiter(parser);
11707  lex_mode_push_string(parser, true, false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
11708  } else {
11709  lex_mode_push_string_eof(parser);
11710  }
11711 
11713  }
11714  default:
11715  // If we get to this point, then we have a % that is completely
11716  // unparsable. In this case we'll just drop it from the parser
11717  // and skip past it and hope that the next token is something
11718  // that we can parse.
11719  pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
11720  goto lex_next_token;
11721  }
11722  }
11723 
11724  if (ambiguous_operator_p(parser, space_seen)) {
11725  PM_PARSER_WARN_TOKEN_FORMAT(parser, parser->current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR, "%", "string literal");
11726  }
11727 
11728  lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
11729  LEX(PM_TOKEN_PERCENT);
11730  }
11731 
11732  // global variable
11733  case '$': {
11734  pm_token_type_t type = lex_global_variable(parser);
11735 
11736  // If we're lexing an embedded variable, then we need to pop back into
11737  // the parent lex context.
11738  if (parser->lex_modes.current->mode == PM_LEX_EMBVAR) {
11739  lex_mode_pop(parser);
11740  }
11741 
11742  lex_state_set(parser, PM_LEX_STATE_END);
11743  LEX(type);
11744  }
11745 
11746  // instance variable, class variable
11747  case '@':
11748  lex_state_set(parser, parser->lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
11749  LEX(lex_at_variable(parser));
11750 
11751  default: {
11752  if (*parser->current.start != '_') {
11753  size_t width = char_is_identifier_start(parser, parser->current.start);
11754 
11755  // If this isn't the beginning of an identifier, then
11756  // it's an invalid token as we've exhausted all of the
11757  // other options. We'll skip past it and return the next
11758  // token after adding an appropriate error message.
11759  if (!width) {
11760  if (*parser->current.start >= 0x80) {
11761  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->current.start);
11762  } else if (*parser->current.start == '\\') {
11763  switch (peek_at(parser, parser->current.start + 1)) {
11764  case ' ':
11765  parser->current.end++;
11766  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped space");
11767  break;
11768  case '\f':
11769  parser->current.end++;
11770  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped form feed");
11771  break;
11772  case '\t':
11773  parser->current.end++;
11774  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped horizontal tab");
11775  break;
11776  case '\v':
11777  parser->current.end++;
11778  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped vertical tab");
11779  break;
11780  case '\r':
11781  if (peek_at(parser, parser->current.start + 2) != '\n') {
11782  parser->current.end++;
11783  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784  break;
11785  }
11786  /* fallthrough */
11787  default:
11788  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789  break;
11790  }
11791  } else if (char_is_ascii_printable(*parser->current.start)) {
11792  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->current.start);
11793  } else {
11794  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_INVALID_CHARACTER, *parser->current.start);
11795  }
11796 
11797  goto lex_next_token;
11798  }
11799 
11800  parser->current.end = parser->current.start + width;
11801  }
11802 
11803  pm_token_type_t type = lex_identifier(parser, previous_command_start);
11804 
11805  // If we've hit a __END__ and it was at the start of the
11806  // line or the start of the file and it is followed by
11807  // either a \n or a \r\n, then this is the last token of the
11808  // file.
11809  if (
11810  ((parser->current.end - parser->current.start) == 7) &&
11811  current_token_starts_line(parser) &&
11812  (memcmp(parser->current.start, "__END__", 7) == 0) &&
11813  (parser->current.end == parser->end || match_eol(parser))
11814  ) {
11815  // Since we know we're about to add an __END__ comment,
11816  // we know we need to add all of the newlines to get the
11817  // correct column information for it.
11818  const uint8_t *cursor = parser->current.end;
11819  while ((cursor = next_newline(cursor, parser->end - cursor)) != NULL) {
11820  pm_newline_list_append(&parser->newline_list, cursor++);
11821  }
11822 
11823  parser->current.end = parser->end;
11824  parser->current.type = PM_TOKEN___END__;
11825  parser_lex_callback(parser);
11826 
11827  parser->data_loc.start = parser->current.start;
11828  parser->data_loc.end = parser->current.end;
11829 
11830  LEX(PM_TOKEN_EOF);
11831  }
11832 
11833  pm_lex_state_t last_state = parser->lex_state;
11834 
11836  if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11837  if (previous_command_start) {
11838  lex_state_set(parser, PM_LEX_STATE_CMDARG);
11839  } else {
11840  lex_state_set(parser, PM_LEX_STATE_ARG);
11841  }
11842  } else if (parser->lex_state == PM_LEX_STATE_FNAME) {
11843  lex_state_set(parser, PM_LEX_STATE_ENDFN);
11844  } else {
11845  lex_state_set(parser, PM_LEX_STATE_END);
11846  }
11847  }
11848 
11849  if (
11850  !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11851  (type == PM_TOKEN_IDENTIFIER) &&
11852  ((pm_parser_local_depth(parser, &parser->current) != -1) ||
11853  pm_token_is_numbered_parameter(parser->current.start, parser->current.end))
11854  ) {
11855  lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11856  }
11857 
11858  LEX(type);
11859  }
11860  }
11861  }
11862  case PM_LEX_LIST: {
11863  if (parser->next_start != NULL) {
11864  parser->current.end = parser->next_start;
11865  parser->next_start = NULL;
11866  }
11867 
11868  // First we'll set the beginning of the token.
11869  parser->current.start = parser->current.end;
11870 
11871  // If there's any whitespace at the start of the list, then we're
11872  // going to trim it off the beginning and create a new token.
11873  size_t whitespace;
11874 
11875  if (parser->heredoc_end) {
11876  whitespace = pm_strspn_inline_whitespace(parser->current.end, parser->end - parser->current.end);
11877  if (peek_offset(parser, (ptrdiff_t)whitespace) == '\n') {
11878  whitespace += 1;
11879  }
11880  } else {
11881  whitespace = pm_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list);
11882  }
11883 
11884  if (whitespace > 0) {
11885  parser->current.end += whitespace;
11886  if (peek_offset(parser, -1) == '\n') {
11887  // mutates next_start
11888  parser_flush_heredoc_end(parser);
11889  }
11890  LEX(PM_TOKEN_WORDS_SEP);
11891  }
11892 
11893  // We'll check if we're at the end of the file. If we are, then we
11894  // need to return the EOF token.
11895  if (parser->current.end >= parser->end) {
11896  LEX(PM_TOKEN_EOF);
11897  }
11898 
11899  // Here we'll get a list of the places where strpbrk should break,
11900  // and then find the first one.
11901  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
11902  const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
11903  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11904 
11905  // If we haven't found an escape yet, then this buffer will be
11906  // unallocated since we can refer directly to the source string.
11907  pm_token_buffer_t token_buffer = { 0 };
11908 
11909  while (breakpoint != NULL) {
11910  // If we hit whitespace, then we must have received content by
11911  // now, so we can return an element of the list.
11912  if (pm_char_is_whitespace(*breakpoint)) {
11913  parser->current.end = breakpoint;
11914  pm_token_buffer_flush(parser, &token_buffer);
11916  }
11917 
11918  // If we hit the terminator, we need to check which token to
11919  // return.
11920  if (*breakpoint == lex_mode->as.list.terminator) {
11921  // If this terminator doesn't actually close the list, then
11922  // we need to continue on past it.
11923  if (lex_mode->as.list.nesting > 0) {
11924  parser->current.end = breakpoint + 1;
11925  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
11926  lex_mode->as.list.nesting--;
11927  continue;
11928  }
11929 
11930  // If we've hit the terminator and we've already skipped
11931  // past content, then we can return a list node.
11932  if (breakpoint > parser->current.start) {
11933  parser->current.end = breakpoint;
11934  pm_token_buffer_flush(parser, &token_buffer);
11936  }
11937 
11938  // Otherwise, switch back to the default state and return
11939  // the end of the list.
11940  parser->current.end = breakpoint + 1;
11941  lex_mode_pop(parser);
11942  lex_state_set(parser, PM_LEX_STATE_END);
11943  LEX(PM_TOKEN_STRING_END);
11944  }
11945 
11946  // If we hit a null byte, skip directly past it.
11947  if (*breakpoint == '\0') {
11948  breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
11949  continue;
11950  }
11951 
11952  // If we hit escapes, then we need to treat the next token
11953  // literally. In this case we'll skip past the next character
11954  // and find the next breakpoint.
11955  if (*breakpoint == '\\') {
11956  parser->current.end = breakpoint + 1;
11957 
11958  // If we've hit the end of the file, then break out of the
11959  // loop by setting the breakpoint to NULL.
11960  if (parser->current.end == parser->end) {
11961  breakpoint = NULL;
11962  continue;
11963  }
11964 
11965  pm_token_buffer_escape(parser, &token_buffer);
11966  uint8_t peeked = peek(parser);
11967 
11968  switch (peeked) {
11969  case ' ':
11970  case '\f':
11971  case '\t':
11972  case '\v':
11973  case '\\':
11974  pm_token_buffer_push_byte(&token_buffer, peeked);
11975  parser->current.end++;
11976  break;
11977  case '\r':
11978  parser->current.end++;
11979  if (peek(parser) != '\n') {
11980  pm_token_buffer_push_byte(&token_buffer, '\r');
11981  break;
11982  }
11983  /* fallthrough */
11984  case '\n':
11985  pm_token_buffer_push_byte(&token_buffer, '\n');
11986 
11987  if (parser->heredoc_end) {
11988  // ... if we are on the same line as a heredoc,
11989  // flush the heredoc and continue parsing after
11990  // heredoc_end.
11991  parser_flush_heredoc_end(parser);
11992  pm_token_buffer_copy(parser, &token_buffer);
11994  } else {
11995  // ... else track the newline.
11996  pm_newline_list_append(&parser->newline_list, parser->current.end);
11997  }
11998 
11999  parser->current.end++;
12000  break;
12001  default:
12002  if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
12003  pm_token_buffer_push_byte(&token_buffer, peeked);
12004  parser->current.end++;
12005  } else if (lex_mode->as.list.interpolation) {
12006  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12007  } else {
12008  pm_token_buffer_push_byte(&token_buffer, '\\');
12009  pm_token_buffer_push_escaped(&token_buffer, parser);
12010  }
12011 
12012  break;
12013  }
12014 
12015  token_buffer.cursor = parser->current.end;
12016  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12017  continue;
12018  }
12019 
12020  // If we hit a #, then we will attempt to lex interpolation.
12021  if (*breakpoint == '#') {
12022  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12023 
12024  if (type == PM_TOKEN_NOT_PROVIDED) {
12025  // If we haven't returned at this point then we had something
12026  // that looked like an interpolated class or instance variable
12027  // like "#@" but wasn't actually. In this case we'll just skip
12028  // to the next breakpoint.
12029  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12030  continue;
12031  }
12032 
12033  if (type == PM_TOKEN_STRING_CONTENT) {
12034  pm_token_buffer_flush(parser, &token_buffer);
12035  }
12036 
12037  LEX(type);
12038  }
12039 
12040  // If we've hit the incrementor, then we need to skip past it
12041  // and find the next breakpoint.
12042  assert(*breakpoint == lex_mode->as.list.incrementor);
12043  parser->current.end = breakpoint + 1;
12044  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12045  lex_mode->as.list.nesting++;
12046  continue;
12047  }
12048 
12049  if (parser->current.end > parser->current.start) {
12050  pm_token_buffer_flush(parser, &token_buffer);
12052  }
12053 
12054  // If we were unable to find a breakpoint, then this token hits the
12055  // end of the file.
12056  parser->current.end = parser->end;
12057  pm_token_buffer_flush(parser, &token_buffer);
12059  }
12060  case PM_LEX_REGEXP: {
12061  // First, we'll set to start of this token to be the current end.
12062  if (parser->next_start == NULL) {
12063  parser->current.start = parser->current.end;
12064  } else {
12065  parser->current.start = parser->next_start;
12066  parser->current.end = parser->next_start;
12067  parser->next_start = NULL;
12068  }
12069 
12070  // We'll check if we're at the end of the file. If we are, then we
12071  // need to return the EOF token.
12072  if (parser->current.end >= parser->end) {
12073  LEX(PM_TOKEN_EOF);
12074  }
12075 
12076  // Get a reference to the current mode.
12077  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12078 
12079  // These are the places where we need to split up the content of the
12080  // regular expression. We'll use strpbrk to find the first of these
12081  // characters.
12082  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
12083  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12084  pm_regexp_token_buffer_t token_buffer = { 0 };
12085 
12086  while (breakpoint != NULL) {
12087  // If we hit the terminator, we need to determine what kind of
12088  // token to return.
12089  if (*breakpoint == lex_mode->as.regexp.terminator) {
12090  if (lex_mode->as.regexp.nesting > 0) {
12091  parser->current.end = breakpoint + 1;
12092  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12093  lex_mode->as.regexp.nesting--;
12094  continue;
12095  }
12096 
12097  // Here we've hit the terminator. If we have already consumed
12098  // content then we need to return that content as string content
12099  // first.
12100  if (breakpoint > parser->current.start) {
12101  parser->current.end = breakpoint;
12102  pm_regexp_token_buffer_flush(parser, &token_buffer);
12104  }
12105 
12106  // Check here if we need to track the newline.
12107  size_t eol_length = match_eol_at(parser, breakpoint);
12108  if (eol_length) {
12109  parser->current.end = breakpoint + eol_length;
12110  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12111  } else {
12112  parser->current.end = breakpoint + 1;
12113  }
12114 
12115  // Since we've hit the terminator of the regular expression,
12116  // we now need to parse the options.
12117  parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
12118 
12119  lex_mode_pop(parser);
12120  lex_state_set(parser, PM_LEX_STATE_END);
12121  LEX(PM_TOKEN_REGEXP_END);
12122  }
12123 
12124  // If we've hit the incrementor, then we need to skip past it
12125  // and find the next breakpoint.
12126  if (*breakpoint && *breakpoint == lex_mode->as.regexp.incrementor) {
12127  parser->current.end = breakpoint + 1;
12128  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12129  lex_mode->as.regexp.nesting++;
12130  continue;
12131  }
12132 
12133  switch (*breakpoint) {
12134  case '\0':
12135  // If we hit a null byte, skip directly past it.
12136  parser->current.end = breakpoint + 1;
12137  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12138  break;
12139  case '\r':
12140  if (peek_at(parser, breakpoint + 1) != '\n') {
12141  parser->current.end = breakpoint + 1;
12142  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12143  break;
12144  }
12145 
12146  breakpoint++;
12147  parser->current.end = breakpoint;
12148  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149  token_buffer.base.cursor = breakpoint;
12150 
12151  /* fallthrough */
12152  case '\n':
12153  // If we've hit a newline, then we need to track that in
12154  // the list of newlines.
12155  if (parser->heredoc_end == NULL) {
12156  pm_newline_list_append(&parser->newline_list, breakpoint);
12157  parser->current.end = breakpoint + 1;
12158  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12159  break;
12160  }
12161 
12162  parser->current.end = breakpoint + 1;
12163  parser_flush_heredoc_end(parser);
12164  pm_regexp_token_buffer_flush(parser, &token_buffer);
12166  case '\\': {
12167  // If we hit escapes, then we need to treat the next
12168  // token literally. In this case we'll skip past the
12169  // next character and find the next breakpoint.
12170  parser->current.end = breakpoint + 1;
12171 
12172  // If we've hit the end of the file, then break out of
12173  // the loop by setting the breakpoint to NULL.
12174  if (parser->current.end == parser->end) {
12175  breakpoint = NULL;
12176  break;
12177  }
12178 
12179  pm_regexp_token_buffer_escape(parser, &token_buffer);
12180  uint8_t peeked = peek(parser);
12181 
12182  switch (peeked) {
12183  case '\r':
12184  parser->current.end++;
12185  if (peek(parser) != '\n') {
12186  if (lex_mode->as.regexp.terminator != '\r') {
12187  pm_token_buffer_push_byte(&token_buffer.base, '\\');
12188  }
12189  pm_regexp_token_buffer_push_byte(&token_buffer, '\r');
12190  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191  break;
12192  }
12193  /* fallthrough */
12194  case '\n':
12195  if (parser->heredoc_end) {
12196  // ... if we are on the same line as a heredoc,
12197  // flush the heredoc and continue parsing after
12198  // heredoc_end.
12199  parser_flush_heredoc_end(parser);
12200  pm_regexp_token_buffer_copy(parser, &token_buffer);
12202  } else {
12203  // ... else track the newline.
12204  pm_newline_list_append(&parser->newline_list, parser->current.end);
12205  }
12206 
12207  parser->current.end++;
12208  break;
12209  case 'c':
12210  case 'C':
12211  case 'M':
12212  case 'u':
12213  case 'x':
12214  escape_read(parser, &token_buffer.regexp_buffer, &token_buffer.base.buffer, PM_ESCAPE_FLAG_REGEXP);
12215  break;
12216  default:
12217  if (lex_mode->as.regexp.terminator == peeked) {
12218  // Some characters when they are used as the
12219  // terminator also receive an escape. They are
12220  // enumerated here.
12221  switch (peeked) {
12222  case '$': case ')': case '*': case '+':
12223  case '.': case '>': case '?': case ']':
12224  case '^': case '|': case '}':
12225  pm_token_buffer_push_byte(&token_buffer.base, '\\');
12226  break;
12227  default:
12228  break;
12229  }
12230 
12231  pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
12232  pm_token_buffer_push_byte(&token_buffer.base, peeked);
12233  parser->current.end++;
12234  break;
12235  }
12236 
12237  if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.base, '\\');
12238  pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
12239  break;
12240  }
12241 
12242  token_buffer.base.cursor = parser->current.end;
12243  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12244  break;
12245  }
12246  case '#': {
12247  // If we hit a #, then we will attempt to lex
12248  // interpolation.
12249  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12250 
12251  if (type == PM_TOKEN_NOT_PROVIDED) {
12252  // If we haven't returned at this point then we had
12253  // something that looked like an interpolated class or
12254  // instance variable like "#@" but wasn't actually. In
12255  // this case we'll just skip to the next breakpoint.
12256  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
12257  break;
12258  }
12259 
12260  if (type == PM_TOKEN_STRING_CONTENT) {
12261  pm_regexp_token_buffer_flush(parser, &token_buffer);
12262  }
12263 
12264  LEX(type);
12265  }
12266  default:
12267  assert(false && "unreachable");
12268  break;
12269  }
12270  }
12271 
12272  if (parser->current.end > parser->current.start) {
12273  pm_regexp_token_buffer_flush(parser, &token_buffer);
12275  }
12276 
12277  // If we were unable to find a breakpoint, then this token hits the
12278  // end of the file.
12279  parser->current.end = parser->end;
12280  pm_regexp_token_buffer_flush(parser, &token_buffer);
12282  }
12283  case PM_LEX_STRING: {
12284  // First, we'll set to start of this token to be the current end.
12285  if (parser->next_start == NULL) {
12286  parser->current.start = parser->current.end;
12287  } else {
12288  parser->current.start = parser->next_start;
12289  parser->current.end = parser->next_start;
12290  parser->next_start = NULL;
12291  }
12292 
12293  // We'll check if we're at the end of the file. If we are, then we need to
12294  // return the EOF token.
12295  if (parser->current.end >= parser->end) {
12296  LEX(PM_TOKEN_EOF);
12297  }
12298 
12299  // These are the places where we need to split up the content of the
12300  // string. We'll use strpbrk to find the first of these characters.
12301  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12302  const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
12303  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12304 
12305  // If we haven't found an escape yet, then this buffer will be
12306  // unallocated since we can refer directly to the source string.
12307  pm_token_buffer_t token_buffer = { 0 };
12308 
12309  while (breakpoint != NULL) {
12310  // If we hit the incrementor, then we'll increment then nesting and
12311  // continue lexing.
12312  if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
12313  lex_mode->as.string.nesting++;
12314  parser->current.end = breakpoint + 1;
12315  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12316  continue;
12317  }
12318 
12319  // Note that we have to check the terminator here first because we could
12320  // potentially be parsing a % string that has a # character as the
12321  // terminator.
12322  if (*breakpoint == lex_mode->as.string.terminator) {
12323  // If this terminator doesn't actually close the string, then we need
12324  // to continue on past it.
12325  if (lex_mode->as.string.nesting > 0) {
12326  parser->current.end = breakpoint + 1;
12327  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12328  lex_mode->as.string.nesting--;
12329  continue;
12330  }
12331 
12332  // Here we've hit the terminator. If we have already consumed content
12333  // then we need to return that content as string content first.
12334  if (breakpoint > parser->current.start) {
12335  parser->current.end = breakpoint;
12336  pm_token_buffer_flush(parser, &token_buffer);
12338  }
12339 
12340  // Otherwise we need to switch back to the parent lex mode and
12341  // return the end of the string.
12342  size_t eol_length = match_eol_at(parser, breakpoint);
12343  if (eol_length) {
12344  parser->current.end = breakpoint + eol_length;
12345  pm_newline_list_append(&parser->newline_list, parser->current.end - 1);
12346  } else {
12347  parser->current.end = breakpoint + 1;
12348  }
12349 
12350  if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
12351  parser->current.end++;
12352  lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
12353  lex_mode_pop(parser);
12354  LEX(PM_TOKEN_LABEL_END);
12355  }
12356 
12357  lex_state_set(parser, PM_LEX_STATE_END);
12358  lex_mode_pop(parser);
12359  LEX(PM_TOKEN_STRING_END);
12360  }
12361 
12362  switch (*breakpoint) {
12363  case '\0':
12364  // Skip directly past the null character.
12365  parser->current.end = breakpoint + 1;
12366  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12367  break;
12368  case '\r':
12369  if (peek_at(parser, breakpoint + 1) != '\n') {
12370  parser->current.end = breakpoint + 1;
12371  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12372  break;
12373  }
12374 
12375  // If we hit a \r\n sequence, then we need to treat it
12376  // as a newline.
12377  breakpoint++;
12378  parser->current.end = breakpoint;
12379  pm_token_buffer_escape(parser, &token_buffer);
12380  token_buffer.cursor = breakpoint;
12381 
12382  /* fallthrough */
12383  case '\n':
12384  // When we hit a newline, we need to flush any potential
12385  // heredocs. Note that this has to happen after we check
12386  // for the terminator in case the terminator is a
12387  // newline character.
12388  if (parser->heredoc_end == NULL) {
12389  pm_newline_list_append(&parser->newline_list, breakpoint);
12390  parser->current.end = breakpoint + 1;
12391  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12392  break;
12393  }
12394 
12395  parser->current.end = breakpoint + 1;
12396  parser_flush_heredoc_end(parser);
12397  pm_token_buffer_flush(parser, &token_buffer);
12399  case '\\': {
12400  // Here we hit escapes.
12401  parser->current.end = breakpoint + 1;
12402 
12403  // If we've hit the end of the file, then break out of
12404  // the loop by setting the breakpoint to NULL.
12405  if (parser->current.end == parser->end) {
12406  breakpoint = NULL;
12407  continue;
12408  }
12409 
12410  pm_token_buffer_escape(parser, &token_buffer);
12411  uint8_t peeked = peek(parser);
12412 
12413  switch (peeked) {
12414  case '\\':
12415  pm_token_buffer_push_byte(&token_buffer, '\\');
12416  parser->current.end++;
12417  break;
12418  case '\r':
12419  parser->current.end++;
12420  if (peek(parser) != '\n') {
12421  if (!lex_mode->as.string.interpolation) {
12422  pm_token_buffer_push_byte(&token_buffer, '\\');
12423  }
12424  pm_token_buffer_push_byte(&token_buffer, '\r');
12425  break;
12426  }
12427  /* fallthrough */
12428  case '\n':
12429  if (!lex_mode->as.string.interpolation) {
12430  pm_token_buffer_push_byte(&token_buffer, '\\');
12431  pm_token_buffer_push_byte(&token_buffer, '\n');
12432  }
12433 
12434  if (parser->heredoc_end) {
12435  // ... if we are on the same line as a heredoc,
12436  // flush the heredoc and continue parsing after
12437  // heredoc_end.
12438  parser_flush_heredoc_end(parser);
12439  pm_token_buffer_copy(parser, &token_buffer);
12441  } else {
12442  // ... else track the newline.
12443  pm_newline_list_append(&parser->newline_list, parser->current.end);
12444  }
12445 
12446  parser->current.end++;
12447  break;
12448  default:
12449  if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
12450  pm_token_buffer_push_byte(&token_buffer, peeked);
12451  parser->current.end++;
12452  } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
12453  pm_token_buffer_push_byte(&token_buffer, peeked);
12454  parser->current.end++;
12455  } else if (lex_mode->as.string.interpolation) {
12456  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12457  } else {
12458  pm_token_buffer_push_byte(&token_buffer, '\\');
12459  pm_token_buffer_push_escaped(&token_buffer, parser);
12460  }
12461 
12462  break;
12463  }
12464 
12465  token_buffer.cursor = parser->current.end;
12466  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12467  break;
12468  }
12469  case '#': {
12470  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12471 
12472  if (type == PM_TOKEN_NOT_PROVIDED) {
12473  // If we haven't returned at this point then we had something that
12474  // looked like an interpolated class or instance variable like "#@"
12475  // but wasn't actually. In this case we'll just skip to the next
12476  // breakpoint.
12477  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12478  break;
12479  }
12480 
12481  if (type == PM_TOKEN_STRING_CONTENT) {
12482  pm_token_buffer_flush(parser, &token_buffer);
12483  }
12484 
12485  LEX(type);
12486  }
12487  default:
12488  assert(false && "unreachable");
12489  }
12490  }
12491 
12492  if (parser->current.end > parser->current.start) {
12493  pm_token_buffer_flush(parser, &token_buffer);
12495  }
12496 
12497  // If we've hit the end of the string, then this is an unterminated
12498  // string. In that case we'll return a string content token.
12499  parser->current.end = parser->end;
12500  pm_token_buffer_flush(parser, &token_buffer);
12502  }
12503  case PM_LEX_HEREDOC: {
12504  // First, we'll set to start of this token.
12505  if (parser->next_start == NULL) {
12506  parser->current.start = parser->current.end;
12507  } else {
12508  parser->current.start = parser->next_start;
12509  parser->current.end = parser->next_start;
12510  parser->heredoc_end = NULL;
12511  parser->next_start = NULL;
12512  }
12513 
12514  // Now let's grab the information about the identifier off of the
12515  // current lex mode.
12516  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
12517  pm_heredoc_lex_mode_t *heredoc_lex_mode = &lex_mode->as.heredoc.base;
12518 
12519  bool line_continuation = lex_mode->as.heredoc.line_continuation;
12520  lex_mode->as.heredoc.line_continuation = false;
12521 
12522  // We'll check if we're at the end of the file. If we are, then we
12523  // will add an error (because we weren't able to find the
12524  // terminator) but still continue parsing so that content after the
12525  // declaration of the heredoc can be parsed.
12526  if (parser->current.end >= parser->end) {
12527  pm_parser_err_heredoc_term(parser, heredoc_lex_mode->ident_start, heredoc_lex_mode->ident_length);
12528  parser->next_start = lex_mode->as.heredoc.next_start;
12529  parser->heredoc_end = parser->current.end;
12530  lex_state_set(parser, PM_LEX_STATE_END);
12531  lex_mode_pop(parser);
12532  LEX(PM_TOKEN_HEREDOC_END);
12533  }
12534 
12535  const uint8_t *ident_start = heredoc_lex_mode->ident_start;
12536  size_t ident_length = heredoc_lex_mode->ident_length;
12537 
12538  // If we are immediately following a newline and we have hit the
12539  // terminator, then we need to return the ending of the heredoc.
12540  if (current_token_starts_line(parser)) {
12541  const uint8_t *start = parser->current.start;
12542 
12543  if (!line_continuation && (start + ident_length <= parser->end)) {
12544  const uint8_t *newline = next_newline(start, parser->end - start);
12545  const uint8_t *ident_end = newline;
12546  const uint8_t *terminator_end = newline;
12547 
12548  if (newline == NULL) {
12549  terminator_end = parser->end;
12550  ident_end = parser->end;
12551  } else {
12552  terminator_end++;
12553  if (newline[-1] == '\r') {
12554  ident_end--; // Remove \r
12555  }
12556  }
12557 
12558  const uint8_t *terminator_start = ident_end - ident_length;
12559  const uint8_t *cursor = start;
12560 
12561  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12562  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12563  cursor++;
12564  }
12565  }
12566 
12567  if (
12568  (cursor == terminator_start) &&
12569  (memcmp(terminator_start, ident_start, ident_length) == 0)
12570  ) {
12571  if (newline != NULL) {
12572  pm_newline_list_append(&parser->newline_list, newline);
12573  }
12574 
12575  parser->current.end = terminator_end;
12576  if (*lex_mode->as.heredoc.next_start == '\\') {
12577  parser->next_start = NULL;
12578  } else {
12579  parser->next_start = lex_mode->as.heredoc.next_start;
12580  parser->heredoc_end = parser->current.end;
12581  }
12582 
12583  lex_state_set(parser, PM_LEX_STATE_END);
12584  lex_mode_pop(parser);
12585  LEX(PM_TOKEN_HEREDOC_END);
12586  }
12587  }
12588 
12589  size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->indent);
12590  if (
12591  heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE &&
12592  lex_mode->as.heredoc.common_whitespace != NULL &&
12593  (*lex_mode->as.heredoc.common_whitespace > whitespace) &&
12594  peek_at(parser, start) != '\n'
12595  ) {
12596  *lex_mode->as.heredoc.common_whitespace = whitespace;
12597  }
12598  }
12599 
12600  // Otherwise we'll be parsing string content. These are the places
12601  // where we need to split up the content of the heredoc. We'll use
12602  // strpbrk to find the first of these characters.
12603  uint8_t breakpoints[] = "\r\n\\#";
12604 
12605  pm_heredoc_quote_t quote = heredoc_lex_mode->quote;
12606  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12607  breakpoints[3] = '\0';
12608  }
12609 
12610  const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12611  pm_token_buffer_t token_buffer = { 0 };
12612  bool was_line_continuation = false;
12613 
12614  while (breakpoint != NULL) {
12615  switch (*breakpoint) {
12616  case '\0':
12617  // Skip directly past the null character.
12618  parser->current.end = breakpoint + 1;
12619  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12620  break;
12621  case '\r':
12622  parser->current.end = breakpoint + 1;
12623 
12624  if (peek_at(parser, breakpoint + 1) != '\n') {
12625  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12626  break;
12627  }
12628 
12629  // If we hit a \r\n sequence, then we want to replace it
12630  // with a single \n character in the final string.
12631  breakpoint++;
12632  pm_token_buffer_escape(parser, &token_buffer);
12633  token_buffer.cursor = breakpoint;
12634 
12635  /* fallthrough */
12636  case '\n': {
12637  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12638  parser_flush_heredoc_end(parser);
12639  parser->current.end = breakpoint + 1;
12640  pm_token_buffer_flush(parser, &token_buffer);
12642  }
12643 
12644  pm_newline_list_append(&parser->newline_list, breakpoint);
12645 
12646  // If we have a - or ~ heredoc, then we can match after
12647  // some leading whitespace.
12648  const uint8_t *start = breakpoint + 1;
12649 
12650  if (!was_line_continuation && (start + ident_length <= parser->end)) {
12651  // We want to match the terminator starting from the end of the line in case
12652  // there is whitespace in the ident such as <<-' DOC' or <<~' DOC'.
12653  const uint8_t *newline = next_newline(start, parser->end - start);
12654 
12655  if (newline == NULL) {
12656  newline = parser->end;
12657  } else if (newline[-1] == '\r') {
12658  newline--; // Remove \r
12659  }
12660 
12661  // Start of a possible terminator.
12662  const uint8_t *terminator_start = newline - ident_length;
12663 
12664  // Cursor to check for the leading whitespace. We skip the
12665  // leading whitespace if we have a - or ~ heredoc.
12666  const uint8_t *cursor = start;
12667 
12668  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12669  while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
12670  cursor++;
12671  }
12672  }
12673 
12674  if (
12675  cursor == terminator_start &&
12676  (memcmp(terminator_start, ident_start, ident_length) == 0)
12677  ) {
12678  parser->current.end = breakpoint + 1;
12679  pm_token_buffer_flush(parser, &token_buffer);
12681  }
12682  }
12683 
12684  size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.base.indent);
12685 
12686  // If we have hit a newline that is followed by a valid
12687  // terminator, then we need to return the content of the
12688  // heredoc here as string content. Then, the next time a
12689  // token is lexed, it will match again and return the
12690  // end of the heredoc.
12691  if (lex_mode->as.heredoc.base.indent == PM_HEREDOC_INDENT_TILDE) {
12692  if ((lex_mode->as.heredoc.common_whitespace != NULL) && (*lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
12693  *lex_mode->as.heredoc.common_whitespace = whitespace;
12694  }
12695 
12696  parser->current.end = breakpoint + 1;
12697  pm_token_buffer_flush(parser, &token_buffer);
12699  }
12700 
12701  // Otherwise we hit a newline and it wasn't followed by
12702  // a terminator, so we can continue parsing.
12703  parser->current.end = breakpoint + 1;
12704  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12705  break;
12706  }
12707  case '\\': {
12708  // If we hit an escape, then we need to skip past
12709  // however many characters the escape takes up. However
12710  // it's important that if \n or \r\n are escaped, we
12711  // stop looping before the newline and not after the
12712  // newline so that we can still potentially find the
12713  // terminator of the heredoc.
12714  parser->current.end = breakpoint + 1;
12715 
12716  // If we've hit the end of the file, then break out of
12717  // the loop by setting the breakpoint to NULL.
12718  if (parser->current.end == parser->end) {
12719  breakpoint = NULL;
12720  continue;
12721  }
12722 
12723  pm_token_buffer_escape(parser, &token_buffer);
12724  uint8_t peeked = peek(parser);
12725 
12726  if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12727  switch (peeked) {
12728  case '\r':
12729  parser->current.end++;
12730  if (peek(parser) != '\n') {
12731  pm_token_buffer_push_byte(&token_buffer, '\\');
12732  pm_token_buffer_push_byte(&token_buffer, '\r');
12733  break;
12734  }
12735  /* fallthrough */
12736  case '\n':
12737  pm_token_buffer_push_byte(&token_buffer, '\\');
12738  pm_token_buffer_push_byte(&token_buffer, '\n');
12739  token_buffer.cursor = parser->current.end + 1;
12740  breakpoint = parser->current.end;
12741  continue;
12742  default:
12743  pm_token_buffer_push_byte(&token_buffer, '\\');
12744  pm_token_buffer_push_escaped(&token_buffer, parser);
12745  break;
12746  }
12747  } else {
12748  switch (peeked) {
12749  case '\r':
12750  parser->current.end++;
12751  if (peek(parser) != '\n') {
12752  pm_token_buffer_push_byte(&token_buffer, '\r');
12753  break;
12754  }
12755  /* fallthrough */
12756  case '\n':
12757  // If we are in a tilde here, we should
12758  // break out of the loop and return the
12759  // string content.
12760  if (heredoc_lex_mode->indent == PM_HEREDOC_INDENT_TILDE) {
12761  const uint8_t *end = parser->current.end;
12762  pm_newline_list_append(&parser->newline_list, end);
12763 
12764  // Here we want the buffer to only
12765  // include up to the backslash.
12766  parser->current.end = breakpoint;
12767  pm_token_buffer_flush(parser, &token_buffer);
12768 
12769  // Now we can advance the end of the
12770  // token past the newline.
12771  parser->current.end = end + 1;
12772  lex_mode->as.heredoc.line_continuation = true;
12774  }
12775 
12776  was_line_continuation = true;
12777  token_buffer.cursor = parser->current.end + 1;
12778  breakpoint = parser->current.end;
12779  continue;
12780  default:
12781  escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
12782  break;
12783  }
12784  }
12785 
12786  token_buffer.cursor = parser->current.end;
12787  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12788  break;
12789  }
12790  case '#': {
12791  pm_token_type_t type = lex_interpolation(parser, breakpoint);
12792 
12793  if (type == PM_TOKEN_NOT_PROVIDED) {
12794  // If we haven't returned at this point then we had
12795  // something that looked like an interpolated class
12796  // or instance variable like "#@" but wasn't
12797  // actually. In this case we'll just skip to the
12798  // next breakpoint.
12799  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
12800  break;
12801  }
12802 
12803  if (type == PM_TOKEN_STRING_CONTENT) {
12804  pm_token_buffer_flush(parser, &token_buffer);
12805  }
12806 
12807  LEX(type);
12808  }
12809  default:
12810  assert(false && "unreachable");
12811  }
12812 
12813  was_line_continuation = false;
12814  }
12815 
12816  if (parser->current.end > parser->current.start) {
12817  parser->current.end = parser->end;
12818  pm_token_buffer_flush(parser, &token_buffer);
12820  }
12821 
12822  // If we've hit the end of the string, then this is an unterminated
12823  // heredoc. In that case we'll return a string content token.
12824  parser->current.end = parser->end;
12825  pm_token_buffer_flush(parser, &token_buffer);
12827  }
12828  }
12829 
12830  assert(false && "unreachable");
12831 }
12832 
12833 #undef LEX
12834 
12835 /******************************************************************************/
12836 /* Parse functions */
12837 /******************************************************************************/
12838 
12847 typedef enum {
12848  PM_BINDING_POWER_UNSET = 0, // used to indicate this token cannot be used as an infix operator
12849  PM_BINDING_POWER_STATEMENT = 2,
12850  PM_BINDING_POWER_MODIFIER_RESCUE = 4, // rescue
12851  PM_BINDING_POWER_MODIFIER = 6, // if unless until while
12852  PM_BINDING_POWER_COMPOSITION = 8, // and or
12853  PM_BINDING_POWER_NOT = 10, // not
12854  PM_BINDING_POWER_MATCH = 12, // => in
12855  PM_BINDING_POWER_DEFINED = 14, // defined?
12856  PM_BINDING_POWER_MULTI_ASSIGNMENT = 16, // =
12857  PM_BINDING_POWER_ASSIGNMENT = 18, // = += -= *= /= %= &= |= ^= &&= ||= <<= >>= **=
12858  PM_BINDING_POWER_TERNARY = 20, // ?:
12859  PM_BINDING_POWER_RANGE = 22, // .. ...
12860  PM_BINDING_POWER_LOGICAL_OR = 24, // ||
12861  PM_BINDING_POWER_LOGICAL_AND = 26, // &&
12862  PM_BINDING_POWER_EQUALITY = 28, // <=> == === != =~ !~
12863  PM_BINDING_POWER_COMPARISON = 30, // > >= < <=
12864  PM_BINDING_POWER_BITWISE_OR = 32, // | ^
12865  PM_BINDING_POWER_BITWISE_AND = 34, // &
12866  PM_BINDING_POWER_SHIFT = 36, // << >>
12867  PM_BINDING_POWER_TERM = 38, // + -
12868  PM_BINDING_POWER_FACTOR = 40, // * / %
12869  PM_BINDING_POWER_UMINUS = 42, // -@
12870  PM_BINDING_POWER_EXPONENT = 44, // **
12871  PM_BINDING_POWER_UNARY = 46, // ! ~ +@
12872  PM_BINDING_POWER_INDEX = 48, // [] []=
12873  PM_BINDING_POWER_CALL = 50, // :: .
12874  PM_BINDING_POWER_MAX = 52
12875 } pm_binding_power_t;
12876 
12881 typedef struct {
12883  pm_binding_power_t left;
12884 
12886  pm_binding_power_t right;
12887 
12889  bool binary;
12890 
12895  bool nonassoc;
12897 
12898 #define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12899 #define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12900 #define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12901 #define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12902 #define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12903 
12904 pm_binding_powers_t pm_binding_powers[PM_TOKEN_MAXIMUM] = {
12905  // rescue
12906  [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER_RESCUE),
12907 
12908  // if unless until while
12909  [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12910  [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12911  [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12912  [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12913 
12914  // and or
12915  [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12916  [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12917 
12918  // => in
12919  [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12920  [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12921 
12922  // &&= &= ^= = >>= <<= -= %= |= ||= += /= *= **=
12923  [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12924  [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12925  [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12926  [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12927  [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12928  [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12929  [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12930  [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12931  [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12932  [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12933  [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12934  [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12935  [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12936  [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12937 
12938  // ?:
12939  [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12940 
12941  // .. ...
12942  [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12943  [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12944  [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12945  [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12946 
12947  // ||
12948  [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12949 
12950  // &&
12951  [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12952 
12953  // != !~ == === =~ <=>
12954  [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12955  [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12956  [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12957  [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12958  [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12959  [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12960 
12961  // > >= < <=
12962  [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12963  [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12964  [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12965  [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12966 
12967  // ^ |
12968  [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12969  [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12970 
12971  // &
12972  [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12973 
12974  // >> <<
12975  [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12976  [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12977 
12978  // - +
12979  [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12980  [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12981 
12982  // % / *
12983  [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12984  [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12985  [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12986  [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12987 
12988  // -@
12989  [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12990  [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX, false, false },
12991 
12992  // **
12993  [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12994  [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12995 
12996  // ! ~ +@
12997  [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12998  [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12999  [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
13000 
13001  // [
13002  [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
13003 
13004  // :: . &.
13005  [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13006  [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
13007  [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
13008 };
13009 
13010 #undef BINDING_POWER_ASSIGNMENT
13011 #undef LEFT_ASSOCIATIVE
13012 #undef RIGHT_ASSOCIATIVE
13013 #undef RIGHT_ASSOCIATIVE_UNARY
13014 
13018 static inline bool
13019 match1(const pm_parser_t *parser, pm_token_type_t type) {
13020  return parser->current.type == type;
13021 }
13022 
13026 static inline bool
13027 match2(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13028  return match1(parser, type1) || match1(parser, type2);
13029 }
13030 
13034 static inline bool
13035 match3(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13036  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
13037 }
13038 
13042 static inline bool
13043 match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
13044  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045 }
13046 
13050 static inline bool
13051 match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053 }
13054 
13058 static inline bool
13059 match7(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
13060  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
13061 }
13062 
13066 static inline bool
13067 match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
13068  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069 }
13070 
13077 static bool
13078 accept1(pm_parser_t *parser, pm_token_type_t type) {
13079  if (match1(parser, type)) {
13080  parser_lex(parser);
13081  return true;
13082  }
13083  return false;
13084 }
13085 
13090 static inline bool
13091 accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13092  if (match2(parser, type1, type2)) {
13093  parser_lex(parser);
13094  return true;
13095  }
13096  return false;
13097 }
13098 
13103 static inline bool
13104 accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105  if (match3(parser, type1, type2, type3)) {
13106  parser_lex(parser);
13107  return true;
13108  }
13109  return false;
13110 }
13111 
13123 static void
13124 expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
13125  if (accept1(parser, type)) return;
13126 
13127  const uint8_t *location = parser->previous.end;
13128  pm_parser_err(parser, location, location, diag_id);
13129 
13130  parser->previous.start = location;
13131  parser->previous.type = PM_TOKEN_MISSING;
13132 }
13133 
13138 static void
13139 expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_diagnostic_id_t diag_id) {
13140  if (accept2(parser, type1, type2)) return;
13141 
13142  const uint8_t *location = parser->previous.end;
13143  pm_parser_err(parser, location, location, diag_id);
13144 
13145  parser->previous.start = location;
13146  parser->previous.type = PM_TOKEN_MISSING;
13147 }
13148 
13152 static void
13153 expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154  if (accept3(parser, type1, type2, type3)) return;
13155 
13156  const uint8_t *location = parser->previous.end;
13157  pm_parser_err(parser, location, location, diag_id);
13158 
13159  parser->previous.start = location;
13160  parser->previous.type = PM_TOKEN_MISSING;
13161 }
13162 
13167 static void
13168 expect1_heredoc_term(pm_parser_t *parser, const uint8_t *ident_start, size_t ident_length) {
13169  if (match1(parser, PM_TOKEN_HEREDOC_END)) {
13170  parser_lex(parser);
13171  } else {
13172  pm_parser_err_heredoc_term(parser, ident_start, ident_length);
13173  parser->previous.start = parser->previous.end;
13174  parser->previous.type = PM_TOKEN_MISSING;
13175  }
13176 }
13177 
13178 static pm_node_t *
13179 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth);
13180 
13185 static pm_node_t *
13186 parse_value_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
13187  pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
13188  pm_assert_value_expression(parser, node);
13189  return node;
13190 }
13191 
13210 static inline bool
13211 token_begins_expression_p(pm_token_type_t type) {
13212  switch (type) {
13214  case PM_TOKEN_KEYWORD_IN:
13215  // We need to special case this because it is a binary operator that
13216  // should not be marked as beginning an expression.
13217  return false;
13218  case PM_TOKEN_BRACE_RIGHT:
13220  case PM_TOKEN_COLON:
13221  case PM_TOKEN_COMMA:
13222  case PM_TOKEN_EMBEXPR_END:
13223  case PM_TOKEN_EOF:
13224  case PM_TOKEN_LAMBDA_BEGIN:
13225  case PM_TOKEN_KEYWORD_DO:
13227  case PM_TOKEN_KEYWORD_END:
13228  case PM_TOKEN_KEYWORD_ELSE:
13231  case PM_TOKEN_KEYWORD_THEN:
13233  case PM_TOKEN_KEYWORD_WHEN:
13234  case PM_TOKEN_NEWLINE:
13236  case PM_TOKEN_SEMICOLON:
13237  // The reason we need this short-circuit is because we're using the
13238  // binding powers table to tell us if the subsequent token could
13239  // potentially be the start of an expression. If there _is_ a binding
13240  // power for one of these tokens, then we should remove it from this list
13241  // and let it be handled by the default case below.
13242  assert(pm_binding_powers[type].left == PM_BINDING_POWER_UNSET);
13243  return false;
13244  case PM_TOKEN_UAMPERSAND:
13245  // This is a special case because this unary operator cannot appear
13246  // as a general operator, it only appears in certain circumstances.
13247  return false;
13248  case PM_TOKEN_UCOLON_COLON:
13249  case PM_TOKEN_UMINUS:
13250  case PM_TOKEN_UMINUS_NUM:
13251  case PM_TOKEN_UPLUS:
13252  case PM_TOKEN_BANG:
13253  case PM_TOKEN_TILDE:
13254  case PM_TOKEN_UDOT_DOT:
13255  case PM_TOKEN_UDOT_DOT_DOT:
13256  // These unary tokens actually do have binding power associated with them
13257  // so that we can correctly place them into the precedence order. But we
13258  // want them to be marked as beginning an expression, so we need to
13259  // special case them here.
13260  return true;
13261  default:
13262  return pm_binding_powers[type].left == PM_BINDING_POWER_UNSET;
13263  }
13264 }
13265 
13270 static pm_node_t *
13271 parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
13272  if (accept1(parser, PM_TOKEN_USTAR)) {
13273  pm_token_t operator = parser->previous;
13274  pm_node_t *expression = parse_value_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13275  return (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
13276  }
13277 
13278  return parse_value_expression(parser, binding_power, accepts_command_call, false, diag_id, depth);
13279 }
13280 
13285 static void
13286 parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
13287  // The method name needs to change. If we previously had
13288  // foo, we now need foo=. In this case we'll allocate a new
13289  // owned string, copy the previous method name in, and
13290  // append an =.
13291  pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
13292  size_t length = constant->length;
13293  uint8_t *name = xcalloc(length + 1, sizeof(uint8_t));
13294  if (name == NULL) return;
13295 
13296  memcpy(name, constant->start, length);
13297  name[length] = '=';
13298 
13299  // Now switch the name to the new string.
13300  // This silences clang analyzer warning about leak of memory pointed by `name`.
13301  // NOLINTNEXTLINE(clang-analyzer-*)
13302  *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
13303 }
13304 
13311 static pm_node_t *
13312 parse_unwriteable_target(pm_parser_t *parser, pm_node_t *target) {
13313  switch (PM_NODE_TYPE(target)) {
13314  case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13315  case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13316  case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13317  case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13318  case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13319  case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13320  case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13321  default: break;
13322  }
13323 
13324  pm_constant_id_t name = pm_parser_constant_id_location(parser, target->location.start, target->location.end);
13325  pm_local_variable_target_node_t *result = pm_local_variable_target_node_create(parser, &target->location, name, 0);
13326 
13327  pm_node_destroy(parser, target);
13328  return (pm_node_t *) result;
13329 }
13330 
13336 static void
13337 parse_target_implicit_parameter(pm_parser_t *parser, pm_node_t *node) {
13338  pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
13339 
13340  for (size_t index = 0; index < implicit_parameters->size; index++) {
13341  if (implicit_parameters->nodes[index] == node) {
13342  // If the node is not the last one in the list, we need to shift the
13343  // remaining nodes down to fill the gap. This is extremely unlikely
13344  // to happen.
13345  if (index != implicit_parameters->size - 1) {
13346  memcpy(&implicit_parameters->nodes[index], &implicit_parameters->nodes[index + 1], (implicit_parameters->size - index - 1) * sizeof(pm_node_t *));
13347  }
13348 
13349  implicit_parameters->size--;
13350  break;
13351  }
13352  }
13353 }
13354 
13363 static pm_node_t *
13364 parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_parent) {
13365  switch (PM_NODE_TYPE(target)) {
13366  case PM_MISSING_NODE:
13367  return target;
13369  case PM_FALSE_NODE:
13370  case PM_SOURCE_FILE_NODE:
13371  case PM_SOURCE_LINE_NODE:
13372  case PM_NIL_NODE:
13373  case PM_SELF_NODE:
13374  case PM_TRUE_NODE: {
13375  // In these special cases, we have specific error messages and we
13376  // will replace them with local variable writes.
13377  return parse_unwriteable_target(parser, target);
13378  }
13382  return target;
13383  case PM_CONSTANT_PATH_NODE:
13384  if (context_def_p(parser)) {
13385  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13386  }
13387 
13388  assert(sizeof(pm_constant_path_target_node_t) == sizeof(pm_constant_path_node_t));
13390 
13391  return target;
13392  case PM_CONSTANT_READ_NODE:
13393  if (context_def_p(parser)) {
13394  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
13395  }
13396 
13397  assert(sizeof(pm_constant_target_node_t) == sizeof(pm_constant_read_node_t));
13398  target->type = PM_CONSTANT_TARGET_NODE;
13399 
13400  return target;
13403  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13404  return target;
13408  return target;
13410  if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13411  PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED, target->location.start);
13412  parse_target_implicit_parameter(parser, target);
13413  }
13414 
13415  const pm_local_variable_read_node_t *cast = (const pm_local_variable_read_node_t *) target;
13416  uint32_t name = cast->name;
13417  uint32_t depth = cast->depth;
13418  pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
13419 
13422 
13423  return target;
13424  }
13426  pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13427  pm_node_t *node = (pm_node_t *) pm_local_variable_target_node_create(parser, &target->location, name, 0);
13428 
13429  parse_target_implicit_parameter(parser, target);
13430  pm_node_destroy(parser, target);
13431 
13432  return node;
13433  }
13437  return target;
13438  case PM_MULTI_TARGET_NODE:
13439  if (splat_parent) {
13440  // Multi target is not accepted in all positions. If this is one
13441  // of them, then we need to add an error.
13442  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13443  }
13444 
13445  return target;
13446  case PM_SPLAT_NODE: {
13447  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13448 
13449  if (splat->expression != NULL) {
13450  splat->expression = parse_target(parser, splat->expression, multiple, true);
13451  }
13452 
13453  return (pm_node_t *) splat;
13454  }
13455  case PM_CALL_NODE: {
13456  pm_call_node_t *call = (pm_call_node_t *) target;
13457 
13458  // If we have no arguments to the call node and we need this to be a
13459  // target then this is either a method call or a local variable
13460  // write.
13461  if (
13462  (call->message_loc.start != NULL) &&
13463  (call->message_loc.end[-1] != '!') &&
13464  (call->message_loc.end[-1] != '?') &&
13465  (call->opening_loc.start == NULL) &&
13466  (call->arguments == NULL) &&
13467  (call->block == NULL)
13468  ) {
13469  if (call->receiver == NULL) {
13470  // When we get here, we have a local variable write, because it
13471  // was previously marked as a method call but now we have an =.
13472  // This looks like:
13473  //
13474  // foo = 1
13475  //
13476  // When it was parsed in the prefix position, foo was seen as a
13477  // method call with no receiver and no arguments. Now we have an
13478  // =, so we know it's a local variable write.
13479  const pm_location_t message_loc = call->message_loc;
13480 
13481  pm_constant_id_t name = pm_parser_local_add_location(parser, message_loc.start, message_loc.end, 0);
13482  pm_node_destroy(parser, target);
13483 
13484  return (pm_node_t *) pm_local_variable_target_node_create(parser, &message_loc, name, 0);
13485  }
13486 
13487  if (*call->message_loc.start == '_' || parser->encoding->alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
13488  if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
13489  pm_parser_err_node(parser, (const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
13490  }
13491 
13492  parse_write_name(parser, &call->name);
13493  return (pm_node_t *) pm_call_target_node_create(parser, call);
13494  }
13495  }
13496 
13497  // If there is no call operator and the message is "[]" then this is
13498  // an aref expression, and we can transform it into an aset
13499  // expression.
13500  if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13501  return (pm_node_t *) pm_index_target_node_create(parser, call);
13502  }
13503  }
13504  /* fallthrough */
13505  default:
13506  // In this case we have a node that we don't know how to convert
13507  // into a target. We need to treat it as an error. For now, we'll
13508  // mark it as an error and just skip right past it.
13509  pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
13510  return target;
13511  }
13512 }
13513 
13518 static pm_node_t *
13519 parse_target_validate(pm_parser_t *parser, pm_node_t *target, bool multiple) {
13520  pm_node_t *result = parse_target(parser, target, multiple, false);
13521 
13522  // Ensure that we have one of an =, an 'in' in for indexes, and a ')' in
13523  // parens after the targets.
13524  if (
13525  !match1(parser, PM_TOKEN_EQUAL) &&
13526  !(context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) &&
13527  !(context_p(parser, PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
13528  ) {
13529  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13530  }
13531 
13532  return result;
13533 }
13534 
13539 static pm_node_t *
13540 parse_shareable_constant_write(pm_parser_t *parser, pm_node_t *write) {
13541  pm_shareable_constant_value_t shareable_constant = pm_parser_scope_shareable_constant_get(parser);
13542 
13543  if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
13544  return (pm_node_t *) pm_shareable_constant_node_create(parser, write, shareable_constant);
13545  }
13546 
13547  return write;
13548 }
13549 
13553 static pm_node_t *
13554 parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_node_t *value) {
13555  switch (PM_NODE_TYPE(target)) {
13556  case PM_MISSING_NODE:
13557  pm_node_destroy(parser, value);
13558  return target;
13560  pm_class_variable_write_node_t *node = pm_class_variable_write_node_create(parser, (pm_class_variable_read_node_t *) target, operator, value);
13561  pm_node_destroy(parser, target);
13562  return (pm_node_t *) node;
13563  }
13564  case PM_CONSTANT_PATH_NODE: {
13565  pm_node_t *node = (pm_node_t *) pm_constant_path_write_node_create(parser, (pm_constant_path_node_t *) target, operator, value);
13566 
13567  if (context_def_p(parser)) {
13568  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13569  }
13570 
13571  return parse_shareable_constant_write(parser, node);
13572  }
13573  case PM_CONSTANT_READ_NODE: {
13574  pm_node_t *node = (pm_node_t *) pm_constant_write_node_create(parser, (pm_constant_read_node_t *) target, operator, value);
13575 
13576  if (context_def_p(parser)) {
13577  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
13578  }
13579 
13580  pm_node_destroy(parser, target);
13581  return parse_shareable_constant_write(parser, node);
13582  }
13585  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13586  /* fallthrough */
13588  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13589  pm_node_destroy(parser, target);
13590  return (pm_node_t *) node;
13591  }
13594 
13595  pm_constant_id_t name = local_read->name;
13596  pm_location_t name_loc = target->location;
13597 
13598  uint32_t depth = local_read->depth;
13599  pm_scope_t *scope = pm_parser_scope_find(parser, depth);
13600 
13601  if (pm_token_is_numbered_parameter(target->location.start, target->location.end)) {
13602  pm_diagnostic_id_t diag_id = (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
13603  PM_PARSER_ERR_FORMAT(parser, target->location.start, target->location.end, diag_id, target->location.start);
13604  parse_target_implicit_parameter(parser, target);
13605  }
13606 
13607  pm_locals_unread(&scope->locals, name);
13608  pm_node_destroy(parser, target);
13609 
13610  return (pm_node_t *) pm_local_variable_write_node_create(parser, name, depth, value, &name_loc, operator);
13611  }
13613  pm_constant_id_t name = pm_parser_local_add_constant(parser, "it", 2);
13614  pm_node_t *node = (pm_node_t *) pm_local_variable_write_node_create(parser, name, 0, value, &target->location, operator);
13615 
13616  parse_target_implicit_parameter(parser, target);
13617  pm_node_destroy(parser, target);
13618 
13619  return node;
13620  }
13622  pm_node_t *write_node = (pm_node_t *) pm_instance_variable_write_node_create(parser, (pm_instance_variable_read_node_t *) target, operator, value);
13623  pm_node_destroy(parser, target);
13624  return write_node;
13625  }
13626  case PM_MULTI_TARGET_NODE:
13627  return (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) target, operator, value);
13628  case PM_SPLAT_NODE: {
13629  pm_splat_node_t *splat = (pm_splat_node_t *) target;
13630 
13631  if (splat->expression != NULL) {
13632  splat->expression = parse_write(parser, splat->expression, operator, value);
13633  }
13634 
13635  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
13636  pm_multi_target_node_targets_append(parser, multi_target, (pm_node_t *) splat);
13637 
13638  return (pm_node_t *) pm_multi_write_node_create(parser, multi_target, operator, value);
13639  }
13640  case PM_CALL_NODE: {
13641  pm_call_node_t *call = (pm_call_node_t *) target;
13642 
13643  // If we have no arguments to the call node and we need this to be a
13644  // target then this is either a method call or a local variable
13645  // write.
13646  if (
13647  (call->message_loc.start != NULL) &&
13648  (call->message_loc.end[-1] != '!') &&
13649  (call->message_loc.end[-1] != '?') &&
13650  (call->opening_loc.start == NULL) &&
13651  (call->arguments == NULL) &&
13652  (call->block == NULL)
13653  ) {
13654  if (call->receiver == NULL) {
13655  // When we get here, we have a local variable write, because it
13656  // was previously marked as a method call but now we have an =.
13657  // This looks like:
13658  //
13659  // foo = 1
13660  //
13661  // When it was parsed in the prefix position, foo was seen as a
13662  // method call with no receiver and no arguments. Now we have an
13663  // =, so we know it's a local variable write.
13664  const pm_location_t message = call->message_loc;
13665 
13666  pm_parser_local_add_location(parser, message.start, message.end, 0);
13667  pm_node_destroy(parser, target);
13668 
13669  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, message.start, message.end);
13670  target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
13671 
13672  pm_refute_numbered_parameter(parser, message.start, message.end);
13673  return target;
13674  }
13675 
13676  if (char_is_identifier_start(parser, call->message_loc.start)) {
13677  // When we get here, we have a method call, because it was
13678  // previously marked as a method call but now we have an =. This
13679  // looks like:
13680  //
13681  // foo.bar = 1
13682  //
13683  // When it was parsed in the prefix position, foo.bar was seen as a
13684  // method call with no arguments. Now we have an =, so we know it's
13685  // a method call with an argument. In this case we will create the
13686  // arguments node, parse the argument, and add it to the list.
13687  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
13688  call->arguments = arguments;
13689 
13690  pm_arguments_node_arguments_append(arguments, value);
13691  call->base.location.end = arguments->base.location.end;
13692 
13693  parse_write_name(parser, &call->name);
13694  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13695 
13696  return (pm_node_t *) call;
13697  }
13698  }
13699 
13700  // If there is no call operator and the message is "[]" then this is
13701  // an aref expression, and we can transform it into an aset
13702  // expression.
13703  if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13704  if (call->arguments == NULL) {
13705  call->arguments = pm_arguments_node_create(parser);
13706  }
13707 
13708  pm_arguments_node_arguments_append(call->arguments, value);
13709  target->location.end = value->location.end;
13710 
13711  // Replace the name with "[]=".
13712  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13713  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13714 
13715  return target;
13716  }
13717 
13718  // If there are arguments on the call node, then it can't be a method
13719  // call ending with = or a local variable write, so it must be a
13720  // syntax error. In this case we'll fall through to our default
13721  // handling. We need to free the value that we parsed because there
13722  // is no way for us to attach it to the tree at this point.
13723  pm_node_destroy(parser, value);
13724  }
13725  /* fallthrough */
13726  default:
13727  // In this case we have a node that we don't know how to convert into a
13728  // target. We need to treat it as an error. For now, we'll mark it as an
13729  // error and just skip right past it.
13730  pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13731  return target;
13732  }
13733 }
13734 
13741 static pm_node_t *
13742 parse_unwriteable_write(pm_parser_t *parser, pm_node_t *target, const pm_token_t *equals, pm_node_t *value) {
13743  switch (PM_NODE_TYPE(target)) {
13744  case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING); break;
13745  case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE); break;
13746  case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE); break;
13747  case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE); break;
13748  case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL); break;
13749  case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF); break;
13750  case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE); break;
13751  default: break;
13752  }
13753 
13754  pm_constant_id_t name = pm_parser_local_add_location(parser, target->location.start, target->location.end, 1);
13755  pm_local_variable_write_node_t *result = pm_local_variable_write_node_create(parser, name, 0, value, &target->location, equals);
13756 
13757  pm_node_destroy(parser, target);
13758  return (pm_node_t *) result;
13759 }
13760 
13771 static pm_node_t *
13772 parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13773  bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13774 
13775  pm_multi_target_node_t *result = pm_multi_target_node_create(parser);
13776  pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target, true, false));
13777 
13778  while (accept1(parser, PM_TOKEN_COMMA)) {
13779  if (accept1(parser, PM_TOKEN_USTAR)) {
13780  // Here we have a splat operator. It can have a name or be
13781  // anonymous. It can be the final target or be in the middle if
13782  // there haven't been any others yet.
13783  if (has_rest) {
13784  pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13785  }
13786 
13787  pm_token_t star_operator = parser->previous;
13788  pm_node_t *name = NULL;
13789 
13790  if (token_begins_expression_p(parser->current.type)) {
13791  name = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13792  name = parse_target(parser, name, true, true);
13793  }
13794 
13795  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
13796  pm_multi_target_node_targets_append(parser, result, splat);
13797  has_rest = true;
13798  } else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13799  context_push(parser, PM_CONTEXT_MULTI_TARGET);
13800  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13801  target = parse_target(parser, target, true, false);
13802 
13803  pm_multi_target_node_targets_append(parser, result, target);
13804  context_pop(parser);
13805  } else if (token_begins_expression_p(parser->current.type)) {
13806  pm_node_t *target = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13807  target = parse_target(parser, target, true, false);
13808 
13809  pm_multi_target_node_targets_append(parser, result, target);
13810  } else if (!match1(parser, PM_TOKEN_EOF)) {
13811  // If we get here, then we have a trailing , in a multi target node.
13812  // We'll add an implicit rest node to represent this.
13813  pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
13814  pm_multi_target_node_targets_append(parser, result, rest);
13815  break;
13816  }
13817  }
13818 
13819  return (pm_node_t *) result;
13820 }
13821 
13826 static pm_node_t *
13827 parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13828  pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13829  accept1(parser, PM_TOKEN_NEWLINE);
13830 
13831  // Ensure that we have either an = or a ) after the targets.
13832  if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13833  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13834  }
13835 
13836  return result;
13837 }
13838 
13842 static pm_statements_node_t *
13843 parse_statements(pm_parser_t *parser, pm_context_t context, uint16_t depth) {
13844  // First, skip past any optional terminators that might be at the beginning
13845  // of the statements.
13846  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13847 
13848  // If we have a terminator, then we can just return NULL.
13849  if (context_terminator(context, &parser->current)) return NULL;
13850 
13851  pm_statements_node_t *statements = pm_statements_node_create(parser);
13852 
13853  // At this point we know we have at least one statement, and that it
13854  // immediately follows the current token.
13855  context_push(parser, context);
13856 
13857  while (true) {
13858  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13859  pm_statements_node_body_append(parser, statements, node, true);
13860 
13861  // If we're recovering from a syntax error, then we need to stop parsing
13862  // the statements now.
13863  if (parser->recovering) {
13864  // If this is the level of context where the recovery has happened,
13865  // then we can mark the parser as done recovering.
13866  if (context_terminator(context, &parser->current)) parser->recovering = false;
13867  break;
13868  }
13869 
13870  // If we have a terminator, then we will parse all consecutive
13871  // terminators and then continue parsing the statements list.
13872  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13873  // If we have a terminator, then we will continue parsing the
13874  // statements list.
13875  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13876  if (context_terminator(context, &parser->current)) break;
13877 
13878  // Now we can continue parsing the list of statements.
13879  continue;
13880  }
13881 
13882  // At this point we have a list of statements that are not terminated by
13883  // a newline or semicolon. At this point we need to check if we're at
13884  // the end of the statements list. If we are, then we should break out
13885  // of the loop.
13886  if (context_terminator(context, &parser->current)) break;
13887 
13888  // At this point, we have a syntax error, because the statement was not
13889  // terminated by a newline or semicolon, and we're not at the end of the
13890  // statements list. Ideally we should scan forward to determine if we
13891  // should insert a missing terminator or break out of parsing the
13892  // statements list at this point.
13893  //
13894  // We don't have that yet, so instead we'll do a more naive approach. If
13895  // we were unable to parse an expression, then we will skip past this
13896  // token and continue parsing the statements list. Otherwise we'll add
13897  // an error and continue parsing the statements list.
13898  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13899  parser_lex(parser);
13900 
13901  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13902  if (context_terminator(context, &parser->current)) break;
13903  } else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13904  // This is an inlined version of accept1 because the error that we
13905  // want to add has varargs. If this happens again, we should
13906  // probably extract a helper function.
13907  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
13908  parser->previous.start = parser->previous.end;
13909  parser->previous.type = PM_TOKEN_MISSING;
13910  }
13911  }
13912 
13913  context_pop(parser);
13914  bool last_value = true;
13915  switch (context) {
13917  case PM_CONTEXT_DEF_ENSURE:
13918  last_value = false;
13919  break;
13920  default:
13921  break;
13922  }
13923  pm_void_statements_check(parser, statements, last_value);
13924 
13925  return statements;
13926 }
13927 
13932 static void
13933 pm_hash_key_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13934  const pm_node_t *duplicated = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, true);
13935 
13936  if (duplicated != NULL) {
13937  pm_buffer_t buffer = { 0 };
13938  pm_static_literal_inspect(&buffer, &parser->newline_list, parser->start_line, parser->encoding->name, duplicated);
13939 
13941  &parser->warning_list,
13942  duplicated->location.start,
13943  duplicated->location.end,
13944  PM_WARN_DUPLICATED_HASH_KEY,
13945  (int) pm_buffer_length(&buffer),
13946  pm_buffer_value(&buffer),
13948  );
13949 
13950  pm_buffer_free(&buffer);
13951  }
13952 }
13953 
13958 static void
13959 pm_when_clause_static_literals_add(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node) {
13960  pm_node_t *previous;
13961 
13962  if ((previous = pm_static_literals_add(&parser->newline_list, parser->start_line, literals, node, false)) != NULL) {
13964  &parser->warning_list,
13965  node->location.start,
13966  node->location.end,
13967  PM_WARN_DUPLICATED_WHEN_CLAUSE,
13970  );
13971  }
13972 }
13973 
13977 static bool
13978 parse_assocs(pm_parser_t *parser, pm_static_literals_t *literals, pm_node_t *node, uint16_t depth) {
13980  bool contains_keyword_splat = false;
13981 
13982  while (true) {
13983  pm_node_t *element;
13984 
13985  switch (parser->current.type) {
13986  case PM_TOKEN_USTAR_STAR: {
13987  parser_lex(parser);
13988  pm_token_t operator = parser->previous;
13989  pm_node_t *value = NULL;
13990 
13991  if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13992  // If we're about to parse a nested hash that is being
13993  // pushed into this hash directly with **, then we want the
13994  // inner hash to share the static literals with the outer
13995  // hash.
13996  parser->current_hash_keys = literals;
13997  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13998  } else if (token_begins_expression_p(parser->current.type)) {
13999  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
14000  } else {
14001  pm_parser_scope_forwarding_keywords_check(parser, &operator);
14002  }
14003 
14004  element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
14005  contains_keyword_splat = true;
14006  break;
14007  }
14008  case PM_TOKEN_LABEL: {
14009  pm_token_t label = parser->current;
14010  parser_lex(parser);
14011 
14012  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &label);
14013  pm_hash_key_static_literals_add(parser, literals, key);
14014 
14015  pm_token_t operator = not_provided(parser);
14016  pm_node_t *value = NULL;
14017 
14018  if (token_begins_expression_p(parser->current.type)) {
14019  value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
14020  } else {
14021  if (parser->encoding->isupper_char(label.start, (label.end - 1) - label.start)) {
14022  pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.start, .end = label.end - 1 };
14023  value = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
14024  } else {
14025  int depth = -1;
14026  pm_token_t identifier = { .type = PM_TOKEN_IDENTIFIER, .start = label.start, .end = label.end - 1 };
14027 
14028  if (identifier.end[-1] == '!' || identifier.end[-1] == '?') {
14029  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
14030  } else {
14031  depth = pm_parser_local_depth(parser, &identifier);
14032  }
14033 
14034  if (depth == -1) {
14035  value = (pm_node_t *) pm_call_node_variable_call_create(parser, &identifier);
14036  } else {
14037  value = (pm_node_t *) pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth);
14038  }
14039  }
14040 
14041  value->location.end++;
14042  value = (pm_node_t *) pm_implicit_node_create(parser, value);
14043  }
14044 
14045  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14046  break;
14047  }
14048  default: {
14049  pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
14050 
14051  // Hash keys that are strings are automatically frozen. We will
14052  // mark that here.
14053  if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
14054  pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
14055  }
14056 
14057  pm_hash_key_static_literals_add(parser, literals, key);
14058 
14059  pm_token_t operator;
14060  if (pm_symbol_node_label_p(key)) {
14061  operator = not_provided(parser);
14062  } else {
14063  expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
14064  operator = parser->previous;
14065  }
14066 
14067  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14068  element = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
14069  break;
14070  }
14071  }
14072 
14073  if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
14074  pm_hash_node_elements_append((pm_hash_node_t *) node, element);
14075  } else {
14076  pm_keyword_hash_node_elements_append((pm_keyword_hash_node_t *) node, element);
14077  }
14078 
14079  // If there's no comma after the element, then we're done.
14080  if (!accept1(parser, PM_TOKEN_COMMA)) break;
14081 
14082  // If the next element starts with a label or a **, then we know we have
14083  // another element in the hash, so we'll continue parsing.
14084  if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)) continue;
14085 
14086  // Otherwise we need to check if the subsequent token begins an expression.
14087  // If it does, then we'll continue parsing.
14088  if (token_begins_expression_p(parser->current.type)) continue;
14089 
14090  // Otherwise by default we will exit out of this loop.
14091  break;
14092  }
14093 
14094  return contains_keyword_splat;
14095 }
14096 
14100 static inline void
14101 parse_arguments_append(pm_parser_t *parser, pm_arguments_t *arguments, pm_node_t *argument) {
14102  if (arguments->arguments == NULL) {
14103  arguments->arguments = pm_arguments_node_create(parser);
14104  }
14105 
14106  pm_arguments_node_arguments_append(arguments->arguments, argument);
14107 }
14108 
14112 static void
14113 parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
14114  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
14115 
14116  // First we need to check if the next token is one that could be the start
14117  // of an argument. If it's not, then we can just return.
14118  if (
14119  match2(parser, terminator, PM_TOKEN_EOF) ||
14120  (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
14121  context_terminator(parser->current_context->context, &parser->current)
14122  ) {
14123  return;
14124  }
14125 
14126  bool parsed_first_argument = false;
14127  bool parsed_bare_hash = false;
14128  bool parsed_block_argument = false;
14129  bool parsed_forwarding_arguments = false;
14130 
14131  while (!match1(parser, PM_TOKEN_EOF)) {
14132  if (parsed_block_argument) {
14133  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
14134  }
14135  if (parsed_forwarding_arguments) {
14136  pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
14137  }
14138 
14139  pm_node_t *argument = NULL;
14140 
14141  switch (parser->current.type) {
14142  case PM_TOKEN_USTAR_STAR:
14143  case PM_TOKEN_LABEL: {
14144  if (parsed_bare_hash) {
14145  pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
14146  }
14147 
14148  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
14149  argument = (pm_node_t *) hash;
14150 
14151  pm_static_literals_t hash_keys = { 0 };
14152  bool contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) hash, (uint16_t) (depth + 1));
14153 
14154  parse_arguments_append(parser, arguments, argument);
14155 
14157  if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14158  pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14159 
14160  pm_static_literals_free(&hash_keys);
14161  parsed_bare_hash = true;
14162 
14163  break;
14164  }
14165  case PM_TOKEN_UAMPERSAND: {
14166  parser_lex(parser);
14167  pm_token_t operator = parser->previous;
14168  pm_node_t *expression = NULL;
14169 
14170  if (token_begins_expression_p(parser->current.type)) {
14171  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14172  } else {
14173  pm_parser_scope_forwarding_block_check(parser, &operator);
14174  }
14175 
14176  argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
14177  if (parsed_block_argument) {
14178  parse_arguments_append(parser, arguments, argument);
14179  } else {
14180  arguments->block = argument;
14181  }
14182 
14183  parsed_block_argument = true;
14184  break;
14185  }
14186  case PM_TOKEN_USTAR: {
14187  parser_lex(parser);
14188  pm_token_t operator = parser->previous;
14189 
14191  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14192  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14193  } else {
14194  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14195 
14196  if (parsed_bare_hash) {
14197  pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14198  }
14199 
14200  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
14201  }
14202 
14203  parse_arguments_append(parser, arguments, argument);
14204  break;
14205  }
14206  case PM_TOKEN_UDOT_DOT_DOT: {
14207  if (accepts_forwarding) {
14208  parser_lex(parser);
14209 
14210  if (token_begins_expression_p(parser->current.type)) {
14211  // If the token begins an expression then this ... was
14212  // not actually argument forwarding but was instead a
14213  // range.
14214  pm_token_t operator = parser->previous;
14215  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
14216 
14217  // If we parse a range, we need to validate that we
14218  // didn't accidentally violate the nonassoc rules of the
14219  // ... operator.
14220  if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
14221  pm_range_node_t *range = (pm_range_node_t *) right;
14222  pm_parser_err(parser, range->operator_loc.start, range->operator_loc.end, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
14223  }
14224 
14225  argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
14226  } else {
14227  pm_parser_scope_forwarding_all_check(parser, &parser->previous);
14228  if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
14229  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
14230  }
14231 
14232  argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
14233  parse_arguments_append(parser, arguments, argument);
14234  pm_node_flag_set((pm_node_t *) arguments->arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
14235  arguments->has_forwarding = true;
14236  parsed_forwarding_arguments = true;
14237  break;
14238  }
14239  }
14240  }
14241  /* fallthrough */
14242  default: {
14243  if (argument == NULL) {
14244  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
14245  }
14246 
14247  bool contains_keywords = false;
14248  bool contains_keyword_splat = false;
14249 
14250  if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14251  if (parsed_bare_hash) {
14252  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
14253  }
14254 
14255  pm_token_t operator;
14256  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
14257  operator = parser->previous;
14258  } else {
14259  operator = not_provided(parser);
14260  }
14261 
14262  pm_keyword_hash_node_t *bare_hash = pm_keyword_hash_node_create(parser);
14263  contains_keywords = true;
14264 
14265  // Create the set of static literals for this hash.
14266  pm_static_literals_t hash_keys = { 0 };
14267  pm_hash_key_static_literals_add(parser, &hash_keys, argument);
14268 
14269  // Finish parsing the one we are part way through.
14270  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
14271  argument = (pm_node_t *) pm_assoc_node_create(parser, argument, &operator, value);
14272 
14273  pm_keyword_hash_node_elements_append(bare_hash, argument);
14274  argument = (pm_node_t *) bare_hash;
14275 
14276  // Then parse more if we have a comma
14277  if (accept1(parser, PM_TOKEN_COMMA) && (
14278  token_begins_expression_p(parser->current.type) ||
14279  match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
14280  )) {
14281  contains_keyword_splat = parse_assocs(parser, &hash_keys, (pm_node_t *) bare_hash, (uint16_t) (depth + 1));
14282  }
14283 
14284  pm_static_literals_free(&hash_keys);
14285  parsed_bare_hash = true;
14286  }
14287 
14288  parse_arguments_append(parser, arguments, argument);
14289 
14290  pm_node_flags_t flags = 0;
14291  if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
14292  if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
14293  pm_node_flag_set((pm_node_t *) arguments->arguments, flags);
14294 
14295  break;
14296  }
14297  }
14298 
14299  parsed_first_argument = true;
14300 
14301  // If parsing the argument failed, we need to stop parsing arguments.
14302  if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->recovering) break;
14303 
14304  // If the terminator of these arguments is not EOF, then we have a
14305  // specific token we're looking for. In that case we can accept a
14306  // newline here because it is not functioning as a statement terminator.
14307  bool accepted_newline = false;
14308  if (terminator != PM_TOKEN_EOF) {
14309  accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14310  }
14311 
14312  if (parser->previous.type == PM_TOKEN_COMMA && parsed_bare_hash) {
14313  // If we previously were on a comma and we just parsed a bare hash,
14314  // then we want to continue parsing arguments. This is because the
14315  // comma was grabbed up by the hash parser.
14316  } else if (accept1(parser, PM_TOKEN_COMMA)) {
14317  // If there was a comma, then we need to check if we also accepted a
14318  // newline. If we did, then this is a syntax error.
14319  if (accepted_newline) {
14320  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14321  }
14322  } else {
14323  // If there is no comma at the end of the argument list then we're
14324  // done parsing arguments and can break out of this loop.
14325  break;
14326  }
14327 
14328  // If we hit the terminator, then that means we have a trailing comma so
14329  // we can accept that output as well.
14330  if (match1(parser, terminator)) break;
14331  }
14332 }
14333 
14344 static pm_multi_target_node_t *
14345 parse_required_destructured_parameter(pm_parser_t *parser) {
14346  expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
14347 
14348  pm_multi_target_node_t *node = pm_multi_target_node_create(parser);
14349  pm_multi_target_node_opening_set(node, &parser->previous);
14350 
14351  do {
14352  pm_node_t *param;
14353 
14354  // If we get here then we have a trailing comma, which isn't allowed in
14355  // the grammar. In other places, multi targets _do_ allow trailing
14356  // commas, so here we'll assume this is a mistake of the user not
14357  // knowing it's not allowed here.
14358  if (node->lefts.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14359  param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14360  pm_multi_target_node_targets_append(parser, node, param);
14361  pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14362  break;
14363  }
14364 
14365  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14366  param = (pm_node_t *) parse_required_destructured_parameter(parser);
14367  } else if (accept1(parser, PM_TOKEN_USTAR)) {
14368  pm_token_t star = parser->previous;
14369  pm_node_t *value = NULL;
14370 
14371  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14372  pm_token_t name = parser->previous;
14373  value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14374  if (pm_parser_parameter_name_check(parser, &name)) {
14375  pm_node_flag_set_repeated_parameter(value);
14376  }
14377  pm_parser_local_add_token(parser, &name, 1);
14378  }
14379 
14380  param = (pm_node_t *) pm_splat_node_create(parser, &star, value);
14381  } else {
14382  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
14383  pm_token_t name = parser->previous;
14384 
14385  param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
14386  if (pm_parser_parameter_name_check(parser, &name)) {
14387  pm_node_flag_set_repeated_parameter(param);
14388  }
14389  pm_parser_local_add_token(parser, &name, 1);
14390  }
14391 
14392  pm_multi_target_node_targets_append(parser, node, param);
14393  } while (accept1(parser, PM_TOKEN_COMMA));
14394 
14395  accept1(parser, PM_TOKEN_NEWLINE);
14396  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
14397  pm_multi_target_node_closing_set(node, &parser->previous);
14398 
14399  return node;
14400 }
14401 
14406 typedef enum {
14407  PM_PARAMETERS_NO_CHANGE = 0, // Extra state for tokens that should not change the state
14408  PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
14409  PM_PARAMETERS_ORDER_KEYWORDS_REST,
14410  PM_PARAMETERS_ORDER_KEYWORDS,
14411  PM_PARAMETERS_ORDER_REST,
14412  PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14413  PM_PARAMETERS_ORDER_OPTIONAL,
14414  PM_PARAMETERS_ORDER_NAMED,
14415  PM_PARAMETERS_ORDER_NONE,
14416 } pm_parameters_order_t;
14417 
14421 static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
14422  [0] = PM_PARAMETERS_NO_CHANGE,
14423  [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14424  [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14425  [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
14426  [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
14427  [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
14428  [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
14429  [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
14430  [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14431  [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
14432  [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
14433  [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
14434 };
14435 
14443 static bool
14444 update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_order_t *current) {
14445  pm_parameters_order_t state = parameters_ordering[token->type];
14446  if (state == PM_PARAMETERS_NO_CHANGE) return true;
14447 
14448  // If we see another ordered argument after a optional argument
14449  // we only continue parsing ordered arguments until we stop seeing ordered arguments.
14450  if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14451  *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
14452  return true;
14453  } else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
14454  return true;
14455  }
14456 
14457  if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14458  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
14459  return false;
14460  } else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
14461  pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
14462  return false;
14463  } else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
14464  // We know what transition we failed on, so we can provide a better error here.
14465  pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
14466  return false;
14467  }
14468 
14469  if (state < *current) *current = state;
14470  return true;
14471 }
14472 
14476 static pm_parameters_node_t *
14477 parse_parameters(
14478  pm_parser_t *parser,
14479  pm_binding_power_t binding_power,
14480  bool uses_parentheses,
14481  bool allows_trailing_comma,
14482  bool allows_forwarding_parameters,
14483  bool accepts_blocks_in_defaults,
14484  uint16_t depth
14485 ) {
14486  pm_do_loop_stack_push(parser, false);
14487 
14488  pm_parameters_node_t *params = pm_parameters_node_create(parser);
14489  pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
14490 
14491  while (true) {
14492  bool parsing = true;
14493 
14494  switch (parser->current.type) {
14496  update_parameter_state(parser, &parser->current, &order);
14497  pm_node_t *param = (pm_node_t *) parse_required_destructured_parameter(parser);
14498 
14499  if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14500  pm_parameters_node_requireds_append(params, param);
14501  } else {
14502  pm_parameters_node_posts_append(params, param);
14503  }
14504  break;
14505  }
14506  case PM_TOKEN_UAMPERSAND:
14507  case PM_TOKEN_AMPERSAND: {
14508  update_parameter_state(parser, &parser->current, &order);
14509  parser_lex(parser);
14510 
14511  pm_token_t operator = parser->previous;
14512  pm_token_t name;
14513 
14514  bool repeated = false;
14515  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14516  name = parser->previous;
14517  repeated = pm_parser_parameter_name_check(parser, &name);
14518  pm_parser_local_add_token(parser, &name, 1);
14519  } else {
14520  name = not_provided(parser);
14521  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_BLOCK;
14522  }
14523 
14524  pm_block_parameter_node_t *param = pm_block_parameter_node_create(parser, &name, &operator);
14525  if (repeated) {
14526  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14527  }
14528  if (params->block == NULL) {
14529  pm_parameters_node_block_set(params, param);
14530  } else {
14531  pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
14532  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14533  }
14534 
14535  break;
14536  }
14537  case PM_TOKEN_UDOT_DOT_DOT: {
14538  if (!allows_forwarding_parameters) {
14539  pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
14540  }
14541 
14542  bool succeeded = update_parameter_state(parser, &parser->current, &order);
14543  parser_lex(parser);
14544 
14545  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_ALL;
14546  pm_forwarding_parameter_node_t *param = pm_forwarding_parameter_node_create(parser, &parser->previous);
14547 
14548  if (params->keyword_rest != NULL) {
14549  // If we already have a keyword rest parameter, then we replace it with the
14550  // forwarding parameter and move the keyword rest parameter to the posts list.
14551  pm_node_t *keyword_rest = params->keyword_rest;
14552  pm_parameters_node_posts_append(params, keyword_rest);
14553  if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
14554  params->keyword_rest = NULL;
14555  }
14556 
14557  pm_parameters_node_keyword_rest_set(params, (pm_node_t *) param);
14558  break;
14559  }
14561  case PM_TOKEN_IDENTIFIER:
14562  case PM_TOKEN_CONSTANT:
14565  case PM_TOKEN_METHOD_NAME: {
14566  parser_lex(parser);
14567  switch (parser->previous.type) {
14568  case PM_TOKEN_CONSTANT:
14569  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14570  break;
14572  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14573  break;
14575  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14576  break;
14578  pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14579  break;
14580  case PM_TOKEN_METHOD_NAME:
14581  pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
14582  break;
14583  default: break;
14584  }
14585 
14586  if (parser->current.type == PM_TOKEN_EQUAL) {
14587  update_parameter_state(parser, &parser->current, &order);
14588  } else {
14589  update_parameter_state(parser, &parser->previous, &order);
14590  }
14591 
14592  pm_token_t name = parser->previous;
14593  bool repeated = pm_parser_parameter_name_check(parser, &name);
14594  pm_parser_local_add_token(parser, &name, 1);
14595 
14596  if (match1(parser, PM_TOKEN_EQUAL)) {
14597  pm_token_t operator = parser->current;
14598  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14599  parser_lex(parser);
14600 
14601  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &name);
14602  uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14603 
14604  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14605  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14606  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14607 
14608  pm_optional_parameter_node_t *param = pm_optional_parameter_node_create(parser, &name, &operator, value);
14609 
14610  if (repeated) {
14611  pm_node_flag_set_repeated_parameter((pm_node_t *) param);
14612  }
14613  pm_parameters_node_optionals_append(params, param);
14614 
14615  // If the value of the parameter increased the number of
14616  // reads of that parameter, then we need to warn that we
14617  // have a circular definition.
14618  if ((parser->version == PM_OPTIONS_VERSION_CRUBY_3_3) && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14619  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, name, PM_ERR_PARAMETER_CIRCULAR);
14620  }
14621 
14622  context_pop(parser);
14623 
14624  // If parsing the value of the parameter resulted in error recovery,
14625  // then we can put a missing node in its place and stop parsing the
14626  // parameters entirely now.
14627  if (parser->recovering) {
14628  parsing = false;
14629  break;
14630  }
14631  } else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14632  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14633  if (repeated) {
14634  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14635  }
14636  pm_parameters_node_requireds_append(params, (pm_node_t *) param);
14637  } else {
14638  pm_required_parameter_node_t *param = pm_required_parameter_node_create(parser, &name);
14639  if (repeated) {
14640  pm_node_flag_set_repeated_parameter((pm_node_t *)param);
14641  }
14642  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14643  }
14644 
14645  break;
14646  }
14647  case PM_TOKEN_LABEL: {
14648  if (!uses_parentheses) parser->in_keyword_arg = true;
14649  update_parameter_state(parser, &parser->current, &order);
14650 
14651  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
14652  parser_lex(parser);
14653 
14654  pm_token_t name = parser->previous;
14655  pm_token_t local = name;
14656  local.end -= 1;
14657 
14658  if (parser->encoding_changed ? parser->encoding->isupper_char(local.start, local.end - local.start) : pm_encoding_utf_8_isupper_char(local.start, local.end - local.start)) {
14659  pm_parser_err(parser, local.start, local.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14660  } else if (local.end[-1] == '!' || local.end[-1] == '?') {
14661  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14662  }
14663 
14664  bool repeated = pm_parser_parameter_name_check(parser, &local);
14665  pm_parser_local_add_token(parser, &local, 1);
14666 
14667  switch (parser->current.type) {
14668  case PM_TOKEN_COMMA:
14670  case PM_TOKEN_PIPE: {
14671  context_pop(parser);
14672 
14673  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14674  if (repeated) {
14675  pm_node_flag_set_repeated_parameter(param);
14676  }
14677 
14678  pm_parameters_node_keywords_append(params, param);
14679  break;
14680  }
14681  case PM_TOKEN_SEMICOLON:
14682  case PM_TOKEN_NEWLINE: {
14683  context_pop(parser);
14684 
14685  if (uses_parentheses) {
14686  parsing = false;
14687  break;
14688  }
14689 
14690  pm_node_t *param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14691  if (repeated) {
14692  pm_node_flag_set_repeated_parameter(param);
14693  }
14694 
14695  pm_parameters_node_keywords_append(params, param);
14696  break;
14697  }
14698  default: {
14699  pm_node_t *param;
14700 
14701  if (token_begins_expression_p(parser->current.type)) {
14702  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &local);
14703  uint32_t reads = parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 ? pm_locals_reads(&parser->current_scope->locals, name_id) : 0;
14704 
14705  if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser, true);
14706  pm_node_t *value = parse_value_expression(parser, binding_power, false, false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14707  if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14708 
14709  if (parser->version == PM_OPTIONS_VERSION_CRUBY_3_3 && (pm_locals_reads(&parser->current_scope->locals, name_id) != reads)) {
14710  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, local, PM_ERR_PARAMETER_CIRCULAR);
14711  }
14712 
14713  param = (pm_node_t *) pm_optional_keyword_parameter_node_create(parser, &name, value);
14714  }
14715  else {
14716  param = (pm_node_t *) pm_required_keyword_parameter_node_create(parser, &name);
14717  }
14718 
14719  if (repeated) {
14720  pm_node_flag_set_repeated_parameter(param);
14721  }
14722 
14723  context_pop(parser);
14724  pm_parameters_node_keywords_append(params, param);
14725 
14726  // If parsing the value of the parameter resulted in error recovery,
14727  // then we can put a missing node in its place and stop parsing the
14728  // parameters entirely now.
14729  if (parser->recovering) {
14730  parsing = false;
14731  break;
14732  }
14733  }
14734  }
14735 
14736  parser->in_keyword_arg = false;
14737  break;
14738  }
14739  case PM_TOKEN_USTAR:
14740  case PM_TOKEN_STAR: {
14741  update_parameter_state(parser, &parser->current, &order);
14742  parser_lex(parser);
14743 
14744  pm_token_t operator = parser->previous;
14745  pm_token_t name;
14746  bool repeated = false;
14747 
14748  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14749  name = parser->previous;
14750  repeated = pm_parser_parameter_name_check(parser, &name);
14751  pm_parser_local_add_token(parser, &name, 1);
14752  } else {
14753  name = not_provided(parser);
14754  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS;
14755  }
14756 
14757  pm_node_t *param = (pm_node_t *) pm_rest_parameter_node_create(parser, &operator, &name);
14758  if (repeated) {
14759  pm_node_flag_set_repeated_parameter(param);
14760  }
14761 
14762  if (params->rest == NULL) {
14763  pm_parameters_node_rest_set(params, param);
14764  } else {
14765  pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14766  pm_parameters_node_posts_append(params, param);
14767  }
14768 
14769  break;
14770  }
14771  case PM_TOKEN_STAR_STAR:
14772  case PM_TOKEN_USTAR_STAR: {
14773  pm_parameters_order_t previous_order = order;
14774  update_parameter_state(parser, &parser->current, &order);
14775  parser_lex(parser);
14776 
14777  pm_token_t operator = parser->previous;
14778  pm_node_t *param;
14779 
14780  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14781  if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14782  pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14783  }
14784 
14785  param = (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
14786  } else {
14787  pm_token_t name;
14788 
14789  bool repeated = false;
14790  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14791  name = parser->previous;
14792  repeated = pm_parser_parameter_name_check(parser, &name);
14793  pm_parser_local_add_token(parser, &name, 1);
14794  } else {
14795  name = not_provided(parser);
14796  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS;
14797  }
14798 
14799  param = (pm_node_t *) pm_keyword_rest_parameter_node_create(parser, &operator, &name);
14800  if (repeated) {
14801  pm_node_flag_set_repeated_parameter(param);
14802  }
14803  }
14804 
14805  if (params->keyword_rest == NULL) {
14806  pm_parameters_node_keyword_rest_set(params, param);
14807  } else {
14808  pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14809  pm_parameters_node_posts_append(params, param);
14810  }
14811 
14812  break;
14813  }
14814  default:
14815  if (parser->previous.type == PM_TOKEN_COMMA) {
14816  if (allows_trailing_comma && order >= PM_PARAMETERS_ORDER_NAMED) {
14817  // If we get here, then we have a trailing comma in a
14818  // block parameter list.
14819  pm_node_t *param = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
14820 
14821  if (params->rest == NULL) {
14822  pm_parameters_node_rest_set(params, param);
14823  } else {
14824  pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
14825  pm_parameters_node_posts_append(params, (pm_node_t *) param);
14826  }
14827  } else {
14828  pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
14829  }
14830  }
14831 
14832  parsing = false;
14833  break;
14834  }
14835 
14836  // If we hit some kind of issue while parsing the parameter, this would
14837  // have been set to false. In that case, we need to break out of the
14838  // loop.
14839  if (!parsing) break;
14840 
14841  bool accepted_newline = false;
14842  if (uses_parentheses) {
14843  accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14844  }
14845 
14846  if (accept1(parser, PM_TOKEN_COMMA)) {
14847  // If there was a comma, but we also accepted a newline, then this
14848  // is a syntax error.
14849  if (accepted_newline) {
14850  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14851  }
14852  } else {
14853  // If there was no comma, then we're done parsing parameters.
14854  break;
14855  }
14856  }
14857 
14858  pm_do_loop_stack_pop(parser);
14859 
14860  // If we don't have any parameters, return `NULL` instead of an empty `ParametersNode`.
14861  if (params->base.location.start == params->base.location.end) {
14862  pm_node_destroy(parser, (pm_node_t *) params);
14863  return NULL;
14864  }
14865 
14866  return params;
14867 }
14868 
14873 static size_t
14874 token_newline_index(const pm_parser_t *parser) {
14875  if (parser->heredoc_end == NULL) {
14876  // This is the common case. In this case we can look at the previously
14877  // recorded newline in the newline list and subtract from the current
14878  // offset.
14879  return parser->newline_list.size - 1;
14880  } else {
14881  // This is unlikely. This is the case that we have already parsed the
14882  // start of a heredoc, so we cannot rely on looking at the previous
14883  // offset of the newline list, and instead must go through the whole
14884  // process of a binary search for the line number.
14885  return (size_t) pm_newline_list_line(&parser->newline_list, parser->current.start, 0);
14886  }
14887 }
14888 
14893 static int64_t
14894 token_column(const pm_parser_t *parser, size_t newline_index, const pm_token_t *token, bool break_on_non_space) {
14895  const uint8_t *cursor = parser->start + parser->newline_list.offsets[newline_index];
14896  const uint8_t *end = token->start;
14897 
14898  // Skip over the BOM if it is present.
14899  if (
14900  newline_index == 0 &&
14901  parser->start[0] == 0xef &&
14902  parser->start[1] == 0xbb &&
14903  parser->start[2] == 0xbf
14904  ) cursor += 3;
14905 
14906  int64_t column = 0;
14907  for (; cursor < end; cursor++) {
14908  switch (*cursor) {
14909  case '\t':
14910  column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14911  break;
14912  case ' ':
14913  column++;
14914  break;
14915  default:
14916  column++;
14917  if (break_on_non_space) return -1;
14918  break;
14919  }
14920  }
14921 
14922  return column;
14923 }
14924 
14929 static void
14930 parser_warn_indentation_mismatch(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening_token, bool if_after_else, bool allow_indent) {
14931  // If these warnings are disabled (unlikely), then we can just return.
14932  if (!parser->warn_mismatched_indentation) return;
14933 
14934  // If the tokens are on the same line, we do not warn.
14935  size_t closing_newline_index = token_newline_index(parser);
14936  if (opening_newline_index == closing_newline_index) return;
14937 
14938  // If the opening token has anything other than spaces or tabs before it,
14939  // then we do not warn. This is unless we are matching up an `if`/`end` pair
14940  // and the `if` immediately follows an `else` keyword.
14941  int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14942  if (!if_after_else && (opening_column == -1)) return;
14943 
14944  // Get a reference to the closing token off the current parser. This assumes
14945  // that the caller has placed this in the correct position.
14946  pm_token_t *closing_token = &parser->current;
14947 
14948  // If the tokens are at the same indentation, we do not warn.
14949  int64_t closing_column = token_column(parser, closing_newline_index, closing_token, true);
14950  if ((closing_column == -1) || (opening_column == closing_column)) return;
14951 
14952  // If the closing column is greater than the opening column and we are
14953  // allowing indentation, then we do not warn.
14954  if (allow_indent && (closing_column > opening_column)) return;
14955 
14956  // Otherwise, add a warning.
14957  PM_PARSER_WARN_FORMAT(
14958  parser,
14959  closing_token->start,
14960  closing_token->end,
14961  PM_WARN_INDENTATION_MISMATCH,
14962  (int) (closing_token->end - closing_token->start),
14963  (const char *) closing_token->start,
14964  (int) (opening_token->end - opening_token->start),
14965  (const char *) opening_token->start,
14966  ((int32_t) opening_newline_index) + parser->start_line
14967  );
14968 }
14969 
14970 typedef enum {
14971  PM_RESCUES_BEGIN = 1,
14972  PM_RESCUES_BLOCK,
14973  PM_RESCUES_CLASS,
14974  PM_RESCUES_DEF,
14975  PM_RESCUES_LAMBDA,
14976  PM_RESCUES_MODULE,
14977  PM_RESCUES_SCLASS
14978 } pm_rescues_type_t;
14979 
14984 static inline void
14985 parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, pm_begin_node_t *parent_node, pm_rescues_type_t type, uint16_t depth) {
14986  pm_rescue_node_t *current = NULL;
14987 
14988  while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14989  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
14990  parser_lex(parser);
14991 
14992  pm_rescue_node_t *rescue = pm_rescue_node_create(parser, &parser->previous);
14993 
14994  switch (parser->current.type) {
14995  case PM_TOKEN_EQUAL_GREATER: {
14996  // Here we have an immediate => after the rescue keyword, in which case
14997  // we're going to have an empty list of exceptions to rescue (which
14998  // implies StandardError).
14999  parser_lex(parser);
15000  pm_rescue_node_operator_set(rescue, &parser->previous);
15001 
15002  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15003  reference = parse_target(parser, reference, false, false);
15004 
15005  pm_rescue_node_reference_set(rescue, reference);
15006  break;
15007  }
15008  case PM_TOKEN_NEWLINE:
15009  case PM_TOKEN_SEMICOLON:
15010  case PM_TOKEN_KEYWORD_THEN:
15011  // Here we have a terminator for the rescue keyword, in which case we're
15012  // going to just continue on.
15013  break;
15014  default: {
15015  if (token_begins_expression_p(parser->current.type) || match1(parser, PM_TOKEN_USTAR)) {
15016  // Here we have something that could be an exception expression, so
15017  // we'll attempt to parse it here and any others delimited by commas.
15018 
15019  do {
15020  pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED, false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
15021  pm_rescue_node_exceptions_append(rescue, expression);
15022 
15023  // If we hit a newline, then this is the end of the rescue expression. We
15024  // can continue on to parse the statements.
15025  if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN)) break;
15026 
15027  // If we hit a `=>` then we're going to parse the exception variable. Once
15028  // we've done that, we'll break out of the loop and parse the statements.
15029  if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
15030  pm_rescue_node_operator_set(rescue, &parser->previous);
15031 
15032  pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
15033  reference = parse_target(parser, reference, false, false);
15034 
15035  pm_rescue_node_reference_set(rescue, reference);
15036  break;
15037  }
15038  } while (accept1(parser, PM_TOKEN_COMMA));
15039  }
15040  }
15041  }
15042 
15043  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
15044  accept1(parser, PM_TOKEN_KEYWORD_THEN);
15045  } else {
15046  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
15047  }
15048 
15050  pm_accepts_block_stack_push(parser, true);
15051  pm_context_t context;
15052 
15053  switch (type) {
15054  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_RESCUE; break;
15055  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_RESCUE; break;
15056  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_RESCUE; break;
15057  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_RESCUE; break;
15058  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_RESCUE; break;
15059  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_RESCUE; break;
15060  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_RESCUE; break;
15061  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15062  }
15063 
15064  pm_statements_node_t *statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15065  if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
15066 
15067  pm_accepts_block_stack_pop(parser);
15068  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15069  }
15070 
15071  if (current == NULL) {
15072  pm_begin_node_rescue_clause_set(parent_node, rescue);
15073  } else {
15074  pm_rescue_node_subsequent_set(current, rescue);
15075  }
15076 
15077  current = rescue;
15078  }
15079 
15080  // The end node locations on rescue nodes will not be set correctly
15081  // since we won't know the end until we've found all subsequent
15082  // clauses. This sets the end location on all rescues once we know it.
15083  if (current != NULL) {
15084  const uint8_t *end_to_set = current->base.location.end;
15085  pm_rescue_node_t *clause = parent_node->rescue_clause;
15086 
15087  while (clause != NULL) {
15088  clause->base.location.end = end_to_set;
15089  clause = clause->subsequent;
15090  }
15091  }
15092 
15093  pm_token_t else_keyword;
15094  if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15095  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15096  opening_newline_index = token_newline_index(parser);
15097 
15098  else_keyword = parser->current;
15099  opening = &else_keyword;
15100 
15101  parser_lex(parser);
15102  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15103 
15104  pm_statements_node_t *else_statements = NULL;
15105  if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
15106  pm_accepts_block_stack_push(parser, true);
15107  pm_context_t context;
15108 
15109  switch (type) {
15110  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ELSE; break;
15111  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ELSE; break;
15112  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ELSE; break;
15113  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ELSE; break;
15114  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15115  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15116  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15117  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15118  }
15119 
15120  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15121  pm_accepts_block_stack_pop(parser);
15122 
15123  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15124  }
15125 
15126  pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->current);
15127  pm_begin_node_else_clause_set(parent_node, else_clause);
15128 
15129  // If we don't have a `current` rescue node, then this is a dangling
15130  // else, and it's an error.
15131  if (current == NULL) pm_parser_err_node(parser, (pm_node_t *) else_clause, PM_ERR_BEGIN_LONELY_ELSE);
15132  }
15133 
15134  if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
15135  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15136  pm_token_t ensure_keyword = parser->current;
15137 
15138  parser_lex(parser);
15139  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15140 
15141  pm_statements_node_t *ensure_statements = NULL;
15142  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15143  pm_accepts_block_stack_push(parser, true);
15144  pm_context_t context;
15145 
15146  switch (type) {
15147  case PM_RESCUES_BEGIN: context = PM_CONTEXT_BEGIN_ENSURE; break;
15148  case PM_RESCUES_BLOCK: context = PM_CONTEXT_BLOCK_ENSURE; break;
15149  case PM_RESCUES_CLASS: context = PM_CONTEXT_CLASS_ENSURE; break;
15150  case PM_RESCUES_DEF: context = PM_CONTEXT_DEF_ENSURE; break;
15151  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ENSURE; break;
15152  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ENSURE; break;
15153  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ENSURE; break;
15154  default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15155  }
15156 
15157  ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15158  pm_accepts_block_stack_pop(parser);
15159 
15160  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15161  }
15162 
15163  pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->current);
15164  pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
15165  }
15166 
15167  if (match1(parser, PM_TOKEN_KEYWORD_END)) {
15168  if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening, false, false);
15169  pm_begin_node_end_keyword_set(parent_node, &parser->current);
15170  } else {
15171  pm_token_t end_keyword = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
15172  pm_begin_node_end_keyword_set(parent_node, &end_keyword);
15173  }
15174 }
15175 
15180 static pm_begin_node_t *
15181 parse_rescues_implicit_begin(pm_parser_t *parser, size_t opening_newline_index, const pm_token_t *opening, const uint8_t *start, pm_statements_node_t *statements, pm_rescues_type_t type, uint16_t depth) {
15182  pm_token_t begin_keyword = not_provided(parser);
15183  pm_begin_node_t *node = pm_begin_node_create(parser, &begin_keyword, statements);
15184 
15185  parse_rescues(parser, opening_newline_index, opening, node, type, (uint16_t) (depth + 1));
15186  node->base.location.start = start;
15187 
15188  return node;
15189 }
15190 
15195 parse_block_parameters(
15196  pm_parser_t *parser,
15197  bool allows_trailing_comma,
15198  const pm_token_t *opening,
15199  bool is_lambda_literal,
15200  bool accepts_blocks_in_defaults,
15201  uint16_t depth
15202 ) {
15203  pm_parameters_node_t *parameters = NULL;
15204  if (!match1(parser, PM_TOKEN_SEMICOLON)) {
15205  parameters = parse_parameters(
15206  parser,
15207  is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
15208  false,
15209  allows_trailing_comma,
15210  false,
15211  accepts_blocks_in_defaults,
15212  (uint16_t) (depth + 1)
15213  );
15214  }
15215 
15216  pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
15217  if ((opening->type != PM_TOKEN_NOT_PROVIDED)) {
15218  accept1(parser, PM_TOKEN_NEWLINE);
15219 
15220  if (accept1(parser, PM_TOKEN_SEMICOLON)) {
15221  do {
15222  switch (parser->current.type) {
15223  case PM_TOKEN_CONSTANT:
15224  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
15225  parser_lex(parser);
15226  break;
15228  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
15229  parser_lex(parser);
15230  break;
15232  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
15233  parser_lex(parser);
15234  break;
15236  pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
15237  parser_lex(parser);
15238  break;
15239  default:
15240  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
15241  break;
15242  }
15243 
15244  bool repeated = pm_parser_parameter_name_check(parser, &parser->previous);
15245  pm_parser_local_add_token(parser, &parser->previous, 1);
15246 
15247  pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
15248  if (repeated) pm_node_flag_set_repeated_parameter((pm_node_t *) local);
15249 
15250  pm_block_parameters_node_append_local(block_parameters, local);
15251  } while (accept1(parser, PM_TOKEN_COMMA));
15252  }
15253  }
15254 
15255  return block_parameters;
15256 }
15257 
15262 static bool
15263 outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
15264  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15265  if (scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) return true;
15266  }
15267 
15268  return false;
15269 }
15270 
15276 static const char * const pm_numbered_parameter_names[] = {
15277  "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
15278 };
15279 
15285 static pm_node_t *
15286 parse_blocklike_parameters(pm_parser_t *parser, pm_node_t *parameters, const pm_token_t *opening, const pm_token_t *closing) {
15287  pm_node_list_t *implicit_parameters = &parser->current_scope->implicit_parameters;
15288 
15289  // If we have ordinary parameters, then we will return them as the set of
15290  // parameters.
15291  if (parameters != NULL) {
15292  // If we also have implicit parameters, then this is an error.
15293  if (implicit_parameters->size > 0) {
15294  pm_node_t *node = implicit_parameters->nodes[0];
15295 
15297  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
15298  } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15299  pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
15300  } else {
15301  assert(false && "unreachable");
15302  }
15303  }
15304 
15305  return parameters;
15306  }
15307 
15308  // If we don't have any implicit parameters, then the set of parameters is
15309  // NULL.
15310  if (implicit_parameters->size == 0) {
15311  return NULL;
15312  }
15313 
15314  // If we don't have ordinary parameters, then we now must validate our set
15315  // of implicit parameters. We can only have numbered parameters or it, but
15316  // they cannot be mixed.
15317  uint8_t numbered_parameter = 0;
15318  bool it_parameter = false;
15319 
15320  for (size_t index = 0; index < implicit_parameters->size; index++) {
15321  pm_node_t *node = implicit_parameters->nodes[index];
15322 
15324  if (it_parameter) {
15325  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
15326  } else if (outer_scope_using_numbered_parameters_p(parser)) {
15327  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
15328  } else if (parser->current_scope->parameters & PM_SCOPE_PARAMETERS_NUMBERED_INNER) {
15329  pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
15330  } else if (pm_token_is_numbered_parameter(node->location.start, node->location.end)) {
15331  numbered_parameter = MAX(numbered_parameter, (uint8_t) (node->location.start[1] - '0'));
15332  } else {
15333  assert(false && "unreachable");
15334  }
15335  } else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
15336  if (numbered_parameter > 0) {
15337  pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
15338  } else {
15339  it_parameter = true;
15340  }
15341  }
15342  }
15343 
15344  if (numbered_parameter > 0) {
15345  // Go through the parent scopes and mark them as being disallowed from
15346  // using numbered parameters because this inner scope is using them.
15347  for (pm_scope_t *scope = parser->current_scope->previous; scope != NULL && !scope->closed; scope = scope->previous) {
15348  scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
15349  }
15350 
15351  const pm_location_t location = { .start = opening->start, .end = closing->end };
15352  return (pm_node_t *) pm_numbered_parameters_node_create(parser, &location, numbered_parameter);
15353  }
15354 
15355  if (it_parameter) {
15356  return (pm_node_t *) pm_it_parameters_node_create(parser, opening, closing);
15357  }
15358 
15359  return NULL;
15360 }
15361 
15365 static pm_block_node_t *
15366 parse_block(pm_parser_t *parser, uint16_t depth) {
15367  pm_token_t opening = parser->previous;
15368  accept1(parser, PM_TOKEN_NEWLINE);
15369 
15370  pm_accepts_block_stack_push(parser, true);
15371  pm_parser_scope_push(parser, false);
15372 
15373  pm_block_parameters_node_t *block_parameters = NULL;
15374 
15375  if (accept1(parser, PM_TOKEN_PIPE)) {
15376  pm_token_t block_parameters_opening = parser->previous;
15377  if (match1(parser, PM_TOKEN_PIPE)) {
15378  block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
15379  parser->command_start = true;
15380  parser_lex(parser);
15381  } else {
15382  block_parameters = parse_block_parameters(parser, true, &block_parameters_opening, false, true, (uint16_t) (depth + 1));
15383  accept1(parser, PM_TOKEN_NEWLINE);
15384  parser->command_start = true;
15385  expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
15386  }
15387 
15388  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
15389  }
15390 
15391  accept1(parser, PM_TOKEN_NEWLINE);
15392  pm_node_t *statements = NULL;
15393 
15394  if (opening.type == PM_TOKEN_BRACE_LEFT) {
15395  if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
15396  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_BRACES, (uint16_t) (depth + 1));
15397  }
15398 
15399  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE);
15400  } else {
15401  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
15403  pm_accepts_block_stack_push(parser, true);
15404  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_BLOCK_KEYWORDS, (uint16_t) (depth + 1));
15405  pm_accepts_block_stack_pop(parser);
15406  }
15407 
15408  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
15409  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
15410  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, 0, NULL, opening.start, (pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1));
15411  }
15412  }
15413 
15414  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END);
15415  }
15416 
15417  pm_constant_id_list_t locals;
15418  pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
15419  pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &opening, &parser->previous);
15420 
15421  pm_parser_scope_pop(parser);
15422  pm_accepts_block_stack_pop(parser);
15423 
15424  return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->previous);
15425 }
15426 
15432 static bool
15433 parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_block, bool accepts_command_call, uint16_t depth) {
15434  bool found = false;
15435 
15436  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
15437  found |= true;
15438  arguments->opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15439 
15440  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15441  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15442  } else {
15443  pm_accepts_block_stack_push(parser, true);
15444  parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
15445 
15446  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
15447  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARGUMENT_TERM_PAREN, pm_token_type_human(parser->current.type));
15448  parser->previous.start = parser->previous.end;
15449  parser->previous.type = PM_TOKEN_MISSING;
15450  }
15451 
15452  pm_accepts_block_stack_pop(parser);
15453  arguments->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
15454  }
15455  } else if (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
15456  found |= true;
15457  pm_accepts_block_stack_push(parser, false);
15458 
15459  // If we get here, then the subsequent token cannot be used as an infix
15460  // operator. In this case we assume the subsequent token is part of an
15461  // argument to this method call.
15462  parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
15463 
15464  // If we have done with the arguments and still not consumed the comma,
15465  // then we have a trailing comma where we need to check whether it is
15466  // allowed or not.
15467  if (parser->previous.type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
15468  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_ARGUMENT, pm_token_type_human(parser->current.type));
15469  }
15470 
15471  pm_accepts_block_stack_pop(parser);
15472  }
15473 
15474  // If we're at the end of the arguments, we can now check if there is a block
15475  // node that starts with a {. If there is, then we can parse it and add it to
15476  // the arguments.
15477  if (accepts_block) {
15478  pm_block_node_t *block = NULL;
15479 
15480  if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
15481  found |= true;
15482  block = parse_block(parser, (uint16_t) (depth + 1));
15483  pm_arguments_validate_block(parser, arguments, block);
15484  } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
15485  found |= true;
15486  block = parse_block(parser, (uint16_t) (depth + 1));
15487  }
15488 
15489  if (block != NULL) {
15490  if (arguments->block == NULL && !arguments->has_forwarding) {
15491  arguments->block = (pm_node_t *) block;
15492  } else {
15493  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
15494 
15495  if (arguments->block != NULL) {
15496  if (arguments->arguments == NULL) {
15497  arguments->arguments = pm_arguments_node_create(parser);
15498  }
15499  pm_arguments_node_arguments_append(arguments->arguments, arguments->block);
15500  }
15501  arguments->block = (pm_node_t *) block;
15502  }
15503  }
15504  }
15505 
15506  return found;
15507 }
15508 
15513 static void
15514 parse_return(pm_parser_t *parser, pm_node_t *node) {
15515  bool in_sclass = false;
15516  for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15517  switch (context_node->context) {
15518  case PM_CONTEXT_BEGIN_ELSE:
15521  case PM_CONTEXT_BEGIN:
15522  case PM_CONTEXT_CASE_IN:
15523  case PM_CONTEXT_CASE_WHEN:
15525  case PM_CONTEXT_DEFINED:
15526  case PM_CONTEXT_ELSE:
15527  case PM_CONTEXT_ELSIF:
15528  case PM_CONTEXT_EMBEXPR:
15529  case PM_CONTEXT_FOR_INDEX:
15530  case PM_CONTEXT_FOR:
15531  case PM_CONTEXT_IF:
15533  case PM_CONTEXT_MAIN:
15535  case PM_CONTEXT_PARENS:
15536  case PM_CONTEXT_POSTEXE:
15537  case PM_CONTEXT_PREDICATE:
15538  case PM_CONTEXT_PREEXE:
15540  case PM_CONTEXT_TERNARY:
15541  case PM_CONTEXT_UNLESS:
15542  case PM_CONTEXT_UNTIL:
15543  case PM_CONTEXT_WHILE:
15544  // Keep iterating up the lists of contexts, because returns can
15545  // see through these.
15546  continue;
15550  case PM_CONTEXT_SCLASS:
15551  in_sclass = true;
15552  continue;
15553  case PM_CONTEXT_CLASS_ELSE:
15556  case PM_CONTEXT_CLASS:
15560  case PM_CONTEXT_MODULE:
15561  // These contexts are invalid for a return.
15562  pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15563  return;
15565  case PM_CONTEXT_BLOCK_ELSE:
15569  case PM_CONTEXT_DEF_ELSE:
15570  case PM_CONTEXT_DEF_ENSURE:
15571  case PM_CONTEXT_DEF_PARAMS:
15572  case PM_CONTEXT_DEF_RESCUE:
15573  case PM_CONTEXT_DEF:
15579  // These contexts are valid for a return, and we should not
15580  // continue to loop.
15581  return;
15582  case PM_CONTEXT_NONE:
15583  // This case should never happen.
15584  assert(false && "unreachable");
15585  break;
15586  }
15587  }
15588  if (in_sclass) {
15589  pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15590  }
15591 }
15592 
15597 static void
15598 parse_block_exit(pm_parser_t *parser, pm_node_t *node) {
15599  for (pm_context_node_t *context_node = parser->current_context; context_node != NULL; context_node = context_node->prev) {
15600  switch (context_node->context) {
15603  case PM_CONTEXT_BLOCK_ELSE:
15606  case PM_CONTEXT_DEFINED:
15607  case PM_CONTEXT_FOR:
15614  case PM_CONTEXT_POSTEXE:
15615  case PM_CONTEXT_UNTIL:
15616  case PM_CONTEXT_WHILE:
15617  // These are the good cases. We're allowed to have a block exit
15618  // in these contexts.
15619  return;
15620  case PM_CONTEXT_DEF:
15621  case PM_CONTEXT_DEF_PARAMS:
15622  case PM_CONTEXT_DEF_ELSE:
15623  case PM_CONTEXT_DEF_ENSURE:
15624  case PM_CONTEXT_DEF_RESCUE:
15625  case PM_CONTEXT_MAIN:
15626  case PM_CONTEXT_PREEXE:
15627  case PM_CONTEXT_SCLASS:
15631  // These are the bad cases. We're not allowed to have a block
15632  // exit in these contexts.
15633  //
15634  // If we get here, then we're about to mark this block exit
15635  // as invalid. However, it could later _become_ valid if we
15636  // find a trailing while/until on the expression. In this
15637  // case instead of adding the error here, we'll add the
15638  // block exit to the list of exits for the expression, and
15639  // the node parsing will handle validating it instead.
15640  assert(parser->current_block_exits != NULL);
15642  return;
15643  case PM_CONTEXT_BEGIN_ELSE:
15646  case PM_CONTEXT_BEGIN:
15647  case PM_CONTEXT_CASE_IN:
15648  case PM_CONTEXT_CASE_WHEN:
15649  case PM_CONTEXT_CLASS_ELSE:
15652  case PM_CONTEXT_CLASS:
15654  case PM_CONTEXT_ELSE:
15655  case PM_CONTEXT_ELSIF:
15656  case PM_CONTEXT_EMBEXPR:
15657  case PM_CONTEXT_FOR_INDEX:
15658  case PM_CONTEXT_IF:
15662  case PM_CONTEXT_MODULE:
15664  case PM_CONTEXT_PARENS:
15665  case PM_CONTEXT_PREDICATE:
15667  case PM_CONTEXT_TERNARY:
15668  case PM_CONTEXT_UNLESS:
15669  // In these contexts we should continue walking up the list of
15670  // contexts.
15671  break;
15672  case PM_CONTEXT_NONE:
15673  // This case should never happen.
15674  assert(false && "unreachable");
15675  break;
15676  }
15677  }
15678 }
15679 
15684 static pm_node_list_t *
15685 push_block_exits(pm_parser_t *parser, pm_node_list_t *current_block_exits) {
15686  pm_node_list_t *previous_block_exits = parser->current_block_exits;
15687  parser->current_block_exits = current_block_exits;
15688  return previous_block_exits;
15689 }
15690 
15696 static void
15697 flush_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15698  pm_node_t *block_exit;
15699  PM_NODE_LIST_FOREACH(parser->current_block_exits, index, block_exit) {
15700  const char *type;
15701 
15702  switch (PM_NODE_TYPE(block_exit)) {
15703  case PM_BREAK_NODE: type = "break"; break;
15704  case PM_NEXT_NODE: type = "next"; break;
15705  case PM_REDO_NODE: type = "redo"; break;
15706  default: assert(false && "unreachable"); type = ""; break;
15707  }
15708 
15709  PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT, type);
15710  }
15711 
15712  parser->current_block_exits = previous_block_exits;
15713 }
15714 
15719 static void
15720 pop_block_exits(pm_parser_t *parser, pm_node_list_t *previous_block_exits) {
15722  // If we matched a trailing while/until, then all of the block exits in
15723  // the contained list are valid. In this case we do not need to do
15724  // anything.
15725  parser->current_block_exits = previous_block_exits;
15726  } else if (previous_block_exits != NULL) {
15727  // If we did not matching a trailing while/until, then all of the block
15728  // exits contained in the list are invalid for this specific context.
15729  // However, they could still become valid in a higher level context if
15730  // there is another list above this one. In this case we'll push all of
15731  // the block exits up to the previous list.
15732  pm_node_list_concat(previous_block_exits, parser->current_block_exits);
15733  parser->current_block_exits = previous_block_exits;
15734  } else {
15735  // If we did not match a trailing while/until and this was the last
15736  // chance to do so, then all of the block exits in the list are invalid
15737  // and we need to add an error for each of them.
15738  flush_block_exits(parser, previous_block_exits);
15739  }
15740 }
15741 
15742 static inline pm_node_t *
15743 parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_context_t context, pm_token_t *then_keyword, uint16_t depth) {
15744  context_push(parser, PM_CONTEXT_PREDICATE);
15745  pm_diagnostic_id_t error_id = context == PM_CONTEXT_IF ? PM_ERR_CONDITIONAL_IF_PREDICATE : PM_ERR_CONDITIONAL_UNLESS_PREDICATE;
15746  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, error_id, (uint16_t) (depth + 1));
15747 
15748  // Predicates are closed by a term, a "then", or a term and then a "then".
15749  bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15750 
15751  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15752  predicate_closed = true;
15753  *then_keyword = parser->previous;
15754  }
15755 
15756  if (!predicate_closed) {
15757  pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15758  }
15759 
15760  context_pop(parser);
15761  return predicate;
15762 }
15763 
15764 static inline pm_node_t *
15765 parse_conditional(pm_parser_t *parser, pm_context_t context, size_t opening_newline_index, bool if_after_else, uint16_t depth) {
15766  pm_node_list_t current_block_exits = { 0 };
15767  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
15768 
15769  pm_token_t keyword = parser->previous;
15770  pm_token_t then_keyword = not_provided(parser);
15771 
15772  pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15773  pm_statements_node_t *statements = NULL;
15774 
15776  pm_accepts_block_stack_push(parser, true);
15777  statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15778  pm_accepts_block_stack_pop(parser);
15779  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15780  }
15781 
15782  pm_token_t end_keyword = not_provided(parser);
15783  pm_node_t *parent = NULL;
15784 
15785  switch (context) {
15786  case PM_CONTEXT_IF:
15787  parent = (pm_node_t *) pm_if_node_create(parser, &keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15788  break;
15789  case PM_CONTEXT_UNLESS:
15790  parent = (pm_node_t *) pm_unless_node_create(parser, &keyword, predicate, &then_keyword, statements);
15791  break;
15792  default:
15793  assert(false && "unreachable");
15794  break;
15795  }
15796 
15797  pm_node_t *current = parent;
15798 
15799  // Parse any number of elsif clauses. This will form a linked list of if
15800  // nodes pointing to each other from the top.
15801  if (context == PM_CONTEXT_IF) {
15802  while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15803  if (parser_end_of_line_p(parser)) {
15804  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
15805  }
15806 
15807  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15808  pm_token_t elsif_keyword = parser->current;
15809  parser_lex(parser);
15810 
15811  pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15812  pm_accepts_block_stack_push(parser, true);
15813 
15814  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_ELSIF, (uint16_t) (depth + 1));
15815  pm_accepts_block_stack_pop(parser);
15816  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15817 
15818  pm_node_t *elsif = (pm_node_t *) pm_if_node_create(parser, &elsif_keyword, predicate, &then_keyword, statements, NULL, &end_keyword);
15819  ((pm_if_node_t *) current)->subsequent = elsif;
15820  current = elsif;
15821  }
15822  }
15823 
15824  if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15825  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
15826  opening_newline_index = token_newline_index(parser);
15827 
15828  parser_lex(parser);
15829  pm_token_t else_keyword = parser->previous;
15830 
15831  pm_accepts_block_stack_push(parser, true);
15832  pm_statements_node_t *else_statements = parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1));
15833  pm_accepts_block_stack_pop(parser);
15834 
15835  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15836  parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword, false, false);
15837  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE);
15838 
15839  pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->previous);
15840 
15841  switch (context) {
15842  case PM_CONTEXT_IF:
15843  ((pm_if_node_t *) current)->subsequent = (pm_node_t *) else_node;
15844  break;
15845  case PM_CONTEXT_UNLESS:
15846  ((pm_unless_node_t *) parent)->else_clause = else_node;
15847  break;
15848  default:
15849  assert(false && "unreachable");
15850  break;
15851  }
15852  } else {
15853  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else, false);
15854  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM);
15855  }
15856 
15857  // Set the appropriate end location for all of the nodes in the subtree.
15858  switch (context) {
15859  case PM_CONTEXT_IF: {
15860  pm_node_t *current = parent;
15861  bool recursing = true;
15862 
15863  while (recursing) {
15864  switch (PM_NODE_TYPE(current)) {
15865  case PM_IF_NODE:
15866  pm_if_node_end_keyword_loc_set((pm_if_node_t *) current, &parser->previous);
15867  current = ((pm_if_node_t *) current)->subsequent;
15868  recursing = current != NULL;
15869  break;
15870  case PM_ELSE_NODE:
15871  pm_else_node_end_keyword_loc_set((pm_else_node_t *) current, &parser->previous);
15872  recursing = false;
15873  break;
15874  default: {
15875  recursing = false;
15876  break;
15877  }
15878  }
15879  }
15880  break;
15881  }
15882  case PM_CONTEXT_UNLESS:
15883  pm_unless_node_end_keyword_loc_set((pm_unless_node_t *) parent, &parser->previous);
15884  break;
15885  default:
15886  assert(false && "unreachable");
15887  break;
15888  }
15889 
15890  pop_block_exits(parser, previous_block_exits);
15891  pm_node_list_free(&current_block_exits);
15892 
15893  return parent;
15894 }
15895 
15900 #define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15901  case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15902  case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15903  case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15904  case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15905  case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15906  case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15907  case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15908  case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15909  case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15910  case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15911 
15916 #define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15917  case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15918  case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15919  case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15920  case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15921  case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15922  case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15923  case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15924 
15930 #define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15931  case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15932  case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15933  case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15934  case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15935  case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15936  case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15937  case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15938  case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15939 
15944 #define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15945  case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15946  case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15947  case PM_TOKEN_CLASS_VARIABLE
15948 
15953 #define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15954  case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15955  case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15956  case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15957 
15958 // Assert here that the flags are the same so that we can safely switch the type
15959 // of the node without having to move the flags.
15960 PM_STATIC_ASSERT(__LINE__, ((int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING), "Expected the flags to match.");
15961 
15966 static inline pm_node_flags_t
15967 parse_unescaped_encoding(const pm_parser_t *parser) {
15968  if (parser->explicit_encoding != NULL) {
15969  if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
15970  // If the there's an explicit encoding and it's using a UTF-8 escape
15971  // sequence, then mark the string as UTF-8.
15973  } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
15974  // If there's a non-UTF-8 escape sequence being used, then the
15975  // string uses the source encoding, unless the source is marked as
15976  // US-ASCII. In that case the string is forced as ASCII-8BIT in
15977  // order to keep the string valid.
15979  }
15980  }
15981  return 0;
15982 }
15983 
15988 static pm_node_t *
15989 parse_string_part(pm_parser_t *parser, uint16_t depth) {
15990  switch (parser->current.type) {
15991  // Here the lexer has returned to us plain string content. In this case
15992  // we'll create a string node that has no opening or closing and return that
15993  // as the part. These kinds of parts look like:
15994  //
15995  // "aaa #{bbb} #@ccc ddd"
15996  // ^^^^ ^ ^^^^
15997  case PM_TOKEN_STRING_CONTENT: {
15998  pm_token_t opening = not_provided(parser);
15999  pm_token_t closing = not_provided(parser);
16000 
16001  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
16002  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16003 
16004  parser_lex(parser);
16005  return node;
16006  }
16007  // Here the lexer has returned the beginning of an embedded expression. In
16008  // that case we'll parse the inner statements and return that as the part.
16009  // These kinds of parts look like:
16010  //
16011  // "aaa #{bbb} #@ccc ddd"
16012  // ^^^^^^
16013  case PM_TOKEN_EMBEXPR_BEGIN: {
16014  // Ruby disallows seeing encoding around interpolation in strings,
16015  // even though it is known at parse time.
16016  parser->explicit_encoding = NULL;
16017 
16018  pm_lex_state_t state = parser->lex_state;
16019  int brace_nesting = parser->brace_nesting;
16020 
16021  parser->brace_nesting = 0;
16022  lex_state_set(parser, PM_LEX_STATE_BEG);
16023  parser_lex(parser);
16024 
16025  pm_token_t opening = parser->previous;
16026  pm_statements_node_t *statements = NULL;
16027 
16028  if (!match1(parser, PM_TOKEN_EMBEXPR_END)) {
16029  pm_accepts_block_stack_push(parser, true);
16030  statements = parse_statements(parser, PM_CONTEXT_EMBEXPR, (uint16_t) (depth + 1));
16031  pm_accepts_block_stack_pop(parser);
16032  }
16033 
16034  parser->brace_nesting = brace_nesting;
16035  lex_state_set(parser, state);
16036 
16037  expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
16038  pm_token_t closing = parser->previous;
16039 
16040  // If this set of embedded statements only contains a single
16041  // statement, then Ruby does not consider it as a possible statement
16042  // that could emit a line event.
16043  if (statements != NULL && statements->body.size == 1) {
16044  pm_node_flag_unset(statements->body.nodes[0], PM_NODE_FLAG_NEWLINE);
16045  }
16046 
16047  return (pm_node_t *) pm_embedded_statements_node_create(parser, &opening, statements, &closing);
16048  }
16049 
16050  // Here the lexer has returned the beginning of an embedded variable.
16051  // In that case we'll parse the variable and create an appropriate node
16052  // for it and then return that node. These kinds of parts look like:
16053  //
16054  // "aaa #{bbb} #@ccc ddd"
16055  // ^^^^^
16056  case PM_TOKEN_EMBVAR: {
16057  // Ruby disallows seeing encoding around interpolation in strings,
16058  // even though it is known at parse time.
16059  parser->explicit_encoding = NULL;
16060 
16061  lex_state_set(parser, PM_LEX_STATE_BEG);
16062  parser_lex(parser);
16063 
16064  pm_token_t operator = parser->previous;
16065  pm_node_t *variable;
16066 
16067  switch (parser->current.type) {
16068  // In this case a back reference is being interpolated. We'll
16069  // create a global variable read node.
16071  parser_lex(parser);
16072  variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16073  break;
16074  // In this case an nth reference is being interpolated. We'll
16075  // create a global variable read node.
16077  parser_lex(parser);
16078  variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16079  break;
16080  // In this case a global variable is being interpolated. We'll
16081  // create a global variable read node.
16083  parser_lex(parser);
16084  variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16085  break;
16086  // In this case an instance variable is being interpolated.
16087  // We'll create an instance variable read node.
16089  parser_lex(parser);
16090  variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
16091  break;
16092  // In this case a class variable is being interpolated. We'll
16093  // create a class variable read node.
16095  parser_lex(parser);
16096  variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
16097  break;
16098  // We can hit here if we got an invalid token. In that case
16099  // we'll not attempt to lex this token and instead just return a
16100  // missing node.
16101  default:
16102  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
16103  variable = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16104  break;
16105  }
16106 
16107  return (pm_node_t *) pm_embedded_variable_node_create(parser, &operator, variable);
16108  }
16109  default:
16110  parser_lex(parser);
16111  pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
16112  return NULL;
16113  }
16114 }
16115 
16121 static const uint8_t *
16122 parse_operator_symbol_name(const pm_token_t *name) {
16123  switch (name->type) {
16124  case PM_TOKEN_TILDE:
16125  case PM_TOKEN_BANG:
16126  if (name->end[-1] == '@') return name->end - 1;
16127  /* fallthrough */
16128  default:
16129  return name->end;
16130  }
16131 }
16132 
16133 static pm_node_t *
16134 parse_operator_symbol(pm_parser_t *parser, const pm_token_t *opening, pm_lex_state_t next_state) {
16135  pm_token_t closing = not_provided(parser);
16136  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, opening, &parser->current, &closing);
16137 
16138  const uint8_t *end = parse_operator_symbol_name(&parser->current);
16139 
16140  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16141  parser_lex(parser);
16142 
16143  pm_string_shared_init(&symbol->unescaped, parser->previous.start, end);
16144  pm_node_flag_set((pm_node_t *) symbol, PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
16145 
16146  return (pm_node_t *) symbol;
16147 }
16148 
16154 static pm_node_t *
16155 parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_state, uint16_t depth) {
16156  const pm_token_t opening = parser->previous;
16157 
16158  if (lex_mode->mode != PM_LEX_STRING) {
16159  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16160 
16161  switch (parser->current.type) {
16162  case PM_CASE_OPERATOR:
16163  return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
16164  case PM_TOKEN_IDENTIFIER:
16165  case PM_TOKEN_CONSTANT:
16167  case PM_TOKEN_METHOD_NAME:
16172  case PM_CASE_KEYWORD:
16173  parser_lex(parser);
16174  break;
16175  default:
16176  expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
16177  break;
16178  }
16179 
16180  pm_token_t closing = not_provided(parser);
16181  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16182 
16183  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16184  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16185 
16186  return (pm_node_t *) symbol;
16187  }
16188 
16189  if (lex_mode->as.string.interpolation) {
16190  // If we have the end of the symbol, then we can return an empty symbol.
16191  if (match1(parser, PM_TOKEN_STRING_END)) {
16192  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16193  parser_lex(parser);
16194 
16195  pm_token_t content = not_provided(parser);
16196  pm_token_t closing = parser->previous;
16197  return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
16198  }
16199 
16200  // Now we can parse the first part of the symbol.
16201  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
16202 
16203  // If we got a string part, then it's possible that we could transform
16204  // what looks like an interpolated symbol into a regular symbol.
16205  if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16206  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16207  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16208 
16209  return (pm_node_t *) pm_string_node_to_symbol_node(parser, (pm_string_node_t *) part, &opening, &parser->previous);
16210  }
16211 
16212  pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16213  if (part) pm_interpolated_symbol_node_append(symbol, part);
16214 
16215  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16216  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16217  pm_interpolated_symbol_node_append(symbol, part);
16218  }
16219  }
16220 
16221  if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
16222  if (match1(parser, PM_TOKEN_EOF)) {
16223  pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16224  } else {
16225  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
16226  }
16227 
16228  pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16229  return (pm_node_t *) symbol;
16230  }
16231 
16232  pm_token_t content;
16233  pm_string_t unescaped;
16234 
16235  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16236  content = parser->current;
16237  unescaped = parser->current_string;
16238  parser_lex(parser);
16239 
16240  // If we have two string contents in a row, then the content of this
16241  // symbol is split because of heredoc contents. This looks like:
16242  //
16243  // <<A; :'a
16244  // A
16245  // b'
16246  //
16247  // In this case, the best way we have to represent this is as an
16248  // interpolated string node, so that's what we'll do here.
16249  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16250  pm_interpolated_symbol_node_t *symbol = pm_interpolated_symbol_node_create(parser, &opening, NULL, &opening);
16251  pm_token_t bounds = not_provided(parser);
16252 
16253  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &unescaped);
16254  pm_interpolated_symbol_node_append(symbol, part);
16255 
16256  part = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &parser->current, &bounds, &parser->current_string);
16257  pm_interpolated_symbol_node_append(symbol, part);
16258 
16259  if (next_state != PM_LEX_STATE_NONE) {
16260  lex_state_set(parser, next_state);
16261  }
16262 
16263  parser_lex(parser);
16264  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16265 
16266  pm_interpolated_symbol_node_closing_loc_set(symbol, &parser->previous);
16267  return (pm_node_t *) symbol;
16268  }
16269  } else {
16270  content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
16271  pm_string_shared_init(&unescaped, content.start, content.end);
16272  }
16273 
16274  if (next_state != PM_LEX_STATE_NONE) {
16275  lex_state_set(parser, next_state);
16276  }
16277 
16278  if (match1(parser, PM_TOKEN_EOF)) {
16279  pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
16280  } else {
16281  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
16282  }
16283 
16284  return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, false));
16285 }
16286 
16291 static inline pm_node_t *
16292 parse_undef_argument(pm_parser_t *parser, uint16_t depth) {
16293  switch (parser->current.type) {
16294  case PM_CASE_OPERATOR: {
16295  const pm_token_t opening = not_provided(parser);
16296  return parse_operator_symbol(parser, &opening, PM_LEX_STATE_NONE);
16297  }
16298  case PM_CASE_KEYWORD:
16299  case PM_TOKEN_CONSTANT:
16300  case PM_TOKEN_IDENTIFIER:
16301  case PM_TOKEN_METHOD_NAME: {
16302  parser_lex(parser);
16303 
16304  pm_token_t opening = not_provided(parser);
16305  pm_token_t closing = not_provided(parser);
16306  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16307 
16308  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16309  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16310 
16311  return (pm_node_t *) symbol;
16312  }
16313  case PM_TOKEN_SYMBOL_BEGIN: {
16314  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16315  parser_lex(parser);
16316 
16317  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16318  }
16319  default:
16320  pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
16321  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16322  }
16323 }
16324 
16331 static inline pm_node_t *
16332 parse_alias_argument(pm_parser_t *parser, bool first, uint16_t depth) {
16333  switch (parser->current.type) {
16334  case PM_CASE_OPERATOR: {
16335  const pm_token_t opening = not_provided(parser);
16336  return parse_operator_symbol(parser, &opening, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
16337  }
16338  case PM_CASE_KEYWORD:
16339  case PM_TOKEN_CONSTANT:
16340  case PM_TOKEN_IDENTIFIER:
16341  case PM_TOKEN_METHOD_NAME: {
16342  if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
16343  parser_lex(parser);
16344 
16345  pm_token_t opening = not_provided(parser);
16346  pm_token_t closing = not_provided(parser);
16347  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
16348 
16349  pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
16350  pm_node_flag_set((pm_node_t *) symbol, parse_symbol_encoding(parser, &parser->previous, &symbol->unescaped, false));
16351 
16352  return (pm_node_t *) symbol;
16353  }
16354  case PM_TOKEN_SYMBOL_BEGIN: {
16355  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
16356  parser_lex(parser);
16357 
16358  return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
16359  }
16361  parser_lex(parser);
16362  return (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
16364  parser_lex(parser);
16365  return (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
16367  parser_lex(parser);
16368  return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
16369  default:
16370  pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
16371  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
16372  }
16373 }
16374 
16379 static pm_node_t *
16380 parse_variable(pm_parser_t *parser) {
16381  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16382  int depth;
16383 
16384  if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16385  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16386  }
16387 
16388  pm_scope_t *current_scope = parser->current_scope;
16389  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16390  if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16391  // When you use a numbered parameter, it implies the existence of
16392  // all of the locals that exist before it. For example, referencing
16393  // _2 means that _1 must exist. Therefore here we loop through all
16394  // of the possibilities and add them into the constant pool.
16395  uint8_t maximum = (uint8_t) (parser->previous.start[1] - '0');
16396  for (uint8_t number = 1; number <= maximum; number++) {
16397  pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
16398  }
16399 
16400  if (!match1(parser, PM_TOKEN_EQUAL)) {
16401  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_NUMBERED_FOUND;
16402  }
16403 
16404  pm_node_t *node = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0, false);
16405  pm_node_list_append(&current_scope->implicit_parameters, node);
16406 
16407  return node;
16408  } else if ((parser->version != PM_OPTIONS_VERSION_CRUBY_3_3) && pm_token_is_it(parser->previous.start, parser->previous.end)) {
16409  pm_node_t *node = (pm_node_t *) pm_it_local_variable_read_node_create(parser, &parser->previous);
16410  pm_node_list_append(&current_scope->implicit_parameters, node);
16411 
16412  return node;
16413  }
16414  }
16415 
16416  return NULL;
16417 }
16418 
16422 static pm_node_t *
16423 parse_variable_call(pm_parser_t *parser) {
16424  pm_node_flags_t flags = 0;
16425 
16426  if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->previous.end[-1] != '!') && (parser->previous.end[-1] != '?')) {
16427  pm_node_t *node = parse_variable(parser);
16428  if (node != NULL) return node;
16430  }
16431 
16432  pm_call_node_t *node = pm_call_node_variable_call_create(parser, &parser->previous);
16433  pm_node_flag_set((pm_node_t *)node, flags);
16434 
16435  return (pm_node_t *) node;
16436 }
16437 
16443 static inline pm_token_t
16444 parse_method_definition_name(pm_parser_t *parser) {
16445  switch (parser->current.type) {
16446  case PM_CASE_KEYWORD:
16447  case PM_TOKEN_CONSTANT:
16448  case PM_TOKEN_METHOD_NAME:
16449  parser_lex(parser);
16450  return parser->previous;
16451  case PM_TOKEN_IDENTIFIER:
16452  pm_refute_numbered_parameter(parser, parser->current.start, parser->current.end);
16453  parser_lex(parser);
16454  return parser->previous;
16455  case PM_CASE_OPERATOR:
16456  lex_state_set(parser, PM_LEX_STATE_ENDFN);
16457  parser_lex(parser);
16458  return parser->previous;
16459  default:
16460  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_NAME, pm_token_type_human(parser->current.type));
16461  return (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->current.start, .end = parser->current.end };
16462  }
16463 }
16464 
16465 static void
16466 parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
16467  // Get a reference to the string struct that is being held by the string
16468  // node. This is the value we're going to actually manipulate.
16469  pm_string_ensure_owned(string);
16470 
16471  // Now get the bounds of the existing string. We'll use this as a
16472  // destination to move bytes into. We'll also use it for bounds checking
16473  // since we don't require that these strings be null terminated.
16474  size_t dest_length = pm_string_length(string);
16475  const uint8_t *source_cursor = (uint8_t *) string->source;
16476  const uint8_t *source_end = source_cursor + dest_length;
16477 
16478  // We're going to move bytes backward in the string when we get leading
16479  // whitespace, so we'll maintain a pointer to the current position in the
16480  // string that we're writing to.
16481  size_t trimmed_whitespace = 0;
16482 
16483  // While we haven't reached the amount of common whitespace that we need to
16484  // trim and we haven't reached the end of the string, we'll keep trimming
16485  // whitespace. Trimming in this context means skipping over these bytes such
16486  // that they aren't copied into the new string.
16487  while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
16488  if (*source_cursor == '\t') {
16489  trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
16490  if (trimmed_whitespace > common_whitespace) break;
16491  } else {
16492  trimmed_whitespace++;
16493  }
16494 
16495  source_cursor++;
16496  dest_length--;
16497  }
16498 
16499  memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
16500  string->length = dest_length;
16501 }
16502 
16506 static void
16507 parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
16508  // The next node should be dedented if it's the first node in the list or if
16509  // it follows a string node.
16510  bool dedent_next = true;
16511 
16512  // Iterate over all nodes, and trim whitespace accordingly. We're going to
16513  // keep around two indices: a read and a write. If we end up trimming all of
16514  // the whitespace from a node, then we'll drop it from the list entirely.
16515  size_t write_index = 0;
16516 
16517  pm_node_t *node;
16518  PM_NODE_LIST_FOREACH(nodes, read_index, node) {
16519  // We're not manipulating child nodes that aren't strings. In this case
16520  // we'll skip past it and indicate that the subsequent node should not
16521  // be dedented.
16522  if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
16523  nodes->nodes[write_index++] = node;
16524  dedent_next = false;
16525  continue;
16526  }
16527 
16528  pm_string_node_t *string_node = ((pm_string_node_t *) node);
16529  if (dedent_next) {
16530  parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
16531  }
16532 
16533  if (string_node->unescaped.length == 0) {
16534  pm_node_destroy(parser, node);
16535  } else {
16536  nodes->nodes[write_index++] = node;
16537  }
16538 
16539  // We always dedent the next node if it follows a string node.
16540  dedent_next = true;
16541  }
16542 
16543  nodes->size = write_index;
16544 }
16545 
16549 static pm_token_t
16550 parse_strings_empty_content(const uint8_t *location) {
16551  return (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
16552 }
16553 
16557 static inline pm_node_t *
16558 parse_strings(pm_parser_t *parser, pm_node_t *current, bool accepts_label, uint16_t depth) {
16559  assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
16560  bool concating = false;
16561 
16562  while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16563  pm_node_t *node = NULL;
16564 
16565  // Here we have found a string literal. We'll parse it and add it to
16566  // the list of strings.
16567  const pm_lex_mode_t *lex_mode = parser->lex_modes.current;
16568  assert(lex_mode->mode == PM_LEX_STRING);
16569  bool lex_interpolation = lex_mode->as.string.interpolation;
16570  bool label_allowed = lex_mode->as.string.label_allowed && accepts_label;
16571 
16572  pm_token_t opening = parser->current;
16573  parser_lex(parser);
16574 
16575  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16576  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16577  // If we get here, then we have an end immediately after a
16578  // start. In that case we'll create an empty content token and
16579  // return an uninterpolated string.
16580  pm_token_t content = parse_strings_empty_content(parser->previous.start);
16581  pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
16582 
16583  pm_string_shared_init(&string->unescaped, content.start, content.end);
16584  node = (pm_node_t *) string;
16585  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16586  // If we get here, then we have an end of a label immediately
16587  // after a start. In that case we'll create an empty symbol
16588  // node.
16589  pm_token_t content = parse_strings_empty_content(parser->previous.start);
16590  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
16591 
16592  pm_string_shared_init(&symbol->unescaped, content.start, content.end);
16593  node = (pm_node_t *) symbol;
16594 
16595  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16596  } else if (!lex_interpolation) {
16597  // If we don't accept interpolation then we expect the string to
16598  // start with a single string content node.
16599  pm_string_t unescaped;
16600  pm_token_t content;
16601 
16602  if (match1(parser, PM_TOKEN_EOF)) {
16603  unescaped = PM_STRING_EMPTY;
16604  content = not_provided(parser);
16605  } else {
16606  unescaped = parser->current_string;
16607  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16608  content = parser->previous;
16609  }
16610 
16611  // It is unfortunately possible to have multiple string content
16612  // nodes in a row in the case that there's heredoc content in
16613  // the middle of the string, like this cursed example:
16614  //
16615  // <<-END+'b
16616  // a
16617  // END
16618  // c'+'d'
16619  //
16620  // In that case we need to switch to an interpolated string to
16621  // be able to contain all of the parts.
16622  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16623  pm_node_list_t parts = { 0 };
16624 
16625  pm_token_t delimiters = not_provided(parser);
16626  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
16627  pm_node_list_append(&parts, part);
16628 
16629  do {
16630  part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
16631  pm_node_list_append(&parts, part);
16632  parser_lex(parser);
16633  } while (match1(parser, PM_TOKEN_STRING_CONTENT));
16634 
16635  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16636  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16637 
16638  pm_node_list_free(&parts);
16639  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16640  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16641  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16642  } else if (match1(parser, PM_TOKEN_EOF)) {
16643  pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16644  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16645  } else if (accept1(parser, PM_TOKEN_STRING_END)) {
16646  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16647  } else {
16648  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
16649  parser->previous.start = parser->previous.end;
16650  parser->previous.type = PM_TOKEN_MISSING;
16651  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
16652  }
16653  } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16654  // In this case we've hit string content so we know the string
16655  // at least has something in it. We'll need to check if the
16656  // following token is the end (in which case we can return a
16657  // plain string) or if it's not then it has interpolation.
16658  pm_token_t content = parser->current;
16659  pm_string_t unescaped = parser->current_string;
16660  parser_lex(parser);
16661 
16662  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16663  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
16664  pm_node_flag_set(node, parse_unescaped_encoding(parser));
16665 
16666  // Kind of odd behavior, but basically if we have an
16667  // unterminated string and it ends in a newline, we back up one
16668  // character so that the error message is on the last line of
16669  // content in the string.
16670  if (!accept1(parser, PM_TOKEN_STRING_END)) {
16671  const uint8_t *location = parser->previous.end;
16672  if (location > parser->start && location[-1] == '\n') location--;
16673  pm_parser_err(parser, location, location, PM_ERR_STRING_LITERAL_EOF);
16674 
16675  parser->previous.start = parser->previous.end;
16676  parser->previous.type = PM_TOKEN_MISSING;
16677  }
16678  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16679  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped, true));
16680  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16681  } else {
16682  // If we get here, then we have interpolation so we'll need
16683  // to create a string or symbol node with interpolation.
16684  pm_node_list_t parts = { 0 };
16685  pm_token_t string_opening = not_provided(parser);
16686  pm_token_t string_closing = not_provided(parser);
16687 
16688  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
16689  pm_node_flag_set(part, parse_unescaped_encoding(parser));
16690  pm_node_list_append(&parts, part);
16691 
16692  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16693  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16694  pm_node_list_append(&parts, part);
16695  }
16696  }
16697 
16698  if (accept1(parser, PM_TOKEN_LABEL_END)) {
16699  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16700  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16701  } else if (match1(parser, PM_TOKEN_EOF)) {
16702  pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16703  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16704  } else {
16705  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16706  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16707  }
16708 
16709  pm_node_list_free(&parts);
16710  }
16711  } else {
16712  // If we get here, then the first part of the string is not plain
16713  // string content, in which case we need to parse the string as an
16714  // interpolated string.
16715  pm_node_list_t parts = { 0 };
16716  pm_node_t *part;
16717 
16718  while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16719  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16720  pm_node_list_append(&parts, part);
16721  }
16722  }
16723 
16724  if (accept1(parser, PM_TOKEN_LABEL_END)) {
16725  node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
16726  if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16727  } else if (match1(parser, PM_TOKEN_EOF)) {
16728  pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16729  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->current);
16730  } else {
16731  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16732  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
16733  }
16734 
16735  pm_node_list_free(&parts);
16736  }
16737 
16738  if (current == NULL) {
16739  // If the node we just parsed is a symbol node, then we can't
16740  // concatenate it with anything else, so we can now return that
16741  // node.
16743  return node;
16744  }
16745 
16746  // If we don't already have a node, then it's fine and we can just
16747  // set the result to be the node we just parsed.
16748  current = node;
16749  } else {
16750  // Otherwise we need to check the type of the node we just parsed.
16751  // If it cannot be concatenated with the previous node, then we'll
16752  // need to add a syntax error.
16754  pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16755  }
16756 
16757  // If we haven't already created our container for concatenation,
16758  // we'll do that now.
16759  if (!concating) {
16760  concating = true;
16761  pm_token_t bounds = not_provided(parser);
16762 
16763  pm_interpolated_string_node_t *container = pm_interpolated_string_node_create(parser, &bounds, NULL, &bounds);
16764  pm_interpolated_string_node_append(container, current);
16765  current = (pm_node_t *) container;
16766  }
16767 
16768  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, node);
16769  }
16770  }
16771 
16772  return current;
16773 }
16774 
16775 #define PM_PARSE_PATTERN_SINGLE 0
16776 #define PM_PARSE_PATTERN_TOP 1
16777 #define PM_PARSE_PATTERN_MULTI 2
16778 
16779 static pm_node_t *
16780 parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth);
16781 
16787 static void
16788 parse_pattern_capture(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_constant_id_t capture, const pm_location_t *location) {
16789  // Skip this capture if it starts with an underscore.
16790  if (*location->start == '_') return;
16791 
16792  if (pm_constant_id_list_includes(captures, capture)) {
16793  pm_parser_err(parser, location->start, location->end, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16794  } else {
16795  pm_constant_id_list_append(captures, capture);
16796  }
16797 }
16798 
16802 static pm_node_t *
16803 parse_pattern_constant_path(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *node, uint16_t depth) {
16804  // Now, if there are any :: operators that follow, parse them as constant
16805  // path nodes.
16806  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16807  pm_token_t delimiter = parser->previous;
16808  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16809  node = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
16810  }
16811 
16812  // If there is a [ or ( that follows, then this is part of a larger pattern
16813  // expression. We'll parse the inner pattern here, then modify the returned
16814  // inner pattern with our constant path attached.
16815  if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16816  return node;
16817  }
16818 
16819  pm_token_t opening;
16820  pm_token_t closing;
16821  pm_node_t *inner = NULL;
16822 
16823  if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16824  opening = parser->previous;
16825  accept1(parser, PM_TOKEN_NEWLINE);
16826 
16827  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16828  inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16829  accept1(parser, PM_TOKEN_NEWLINE);
16830  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
16831  }
16832 
16833  closing = parser->previous;
16834  } else {
16835  parser_lex(parser);
16836  opening = parser->previous;
16837  accept1(parser, PM_TOKEN_NEWLINE);
16838 
16839  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16840  inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16841  accept1(parser, PM_TOKEN_NEWLINE);
16842  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
16843  }
16844 
16845  closing = parser->previous;
16846  }
16847 
16848  if (!inner) {
16849  // If there was no inner pattern, then we have something like Foo() or
16850  // Foo[]. In that case we'll create an array pattern with no requireds.
16851  return (pm_node_t *) pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16852  }
16853 
16854  // Now that we have the inner pattern, check to see if it's an array, find,
16855  // or hash pattern. If it is, then we'll attach our constant path to it if
16856  // it doesn't already have a constant. If it's not one of those node types
16857  // or it does have a constant, then we'll create an array pattern.
16858  switch (PM_NODE_TYPE(inner)) {
16859  case PM_ARRAY_PATTERN_NODE: {
16860  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
16861 
16862  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16863  pattern_node->base.location.start = node->location.start;
16864  pattern_node->base.location.end = closing.end;
16865 
16866  pattern_node->constant = node;
16867  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16868  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16869 
16870  return (pm_node_t *) pattern_node;
16871  }
16872 
16873  break;
16874  }
16875  case PM_FIND_PATTERN_NODE: {
16876  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
16877 
16878  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16879  pattern_node->base.location.start = node->location.start;
16880  pattern_node->base.location.end = closing.end;
16881 
16882  pattern_node->constant = node;
16883  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16884  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16885 
16886  return (pm_node_t *) pattern_node;
16887  }
16888 
16889  break;
16890  }
16891  case PM_HASH_PATTERN_NODE: {
16892  pm_hash_pattern_node_t *pattern_node = (pm_hash_pattern_node_t *) inner;
16893 
16894  if (pattern_node->constant == NULL && pattern_node->opening_loc.start == NULL) {
16895  pattern_node->base.location.start = node->location.start;
16896  pattern_node->base.location.end = closing.end;
16897 
16898  pattern_node->constant = node;
16899  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
16900  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
16901 
16902  return (pm_node_t *) pattern_node;
16903  }
16904 
16905  break;
16906  }
16907  default:
16908  break;
16909  }
16910 
16911  // If we got here, then we didn't return one of the inner patterns by
16912  // attaching its constant. In this case we'll create an array pattern and
16913  // attach our constant to it.
16914  pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16915  pm_array_pattern_node_requireds_append(pattern_node, inner);
16916  return (pm_node_t *) pattern_node;
16917 }
16918 
16922 static pm_splat_node_t *
16923 parse_pattern_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16924  assert(parser->previous.type == PM_TOKEN_USTAR);
16925  pm_token_t operator = parser->previous;
16926  pm_node_t *name = NULL;
16927 
16928  // Rest patterns don't necessarily have a name associated with them. So we
16929  // will check for that here. If they do, then we'll add it to the local
16930  // table since this pattern will cause it to become a local variable.
16931  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16932  pm_token_t identifier = parser->previous;
16933  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &identifier);
16934 
16935  int depth;
16936  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16937  pm_parser_local_add(parser, constant_id, identifier.start, identifier.end, 0);
16938  }
16939 
16940  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&identifier));
16941  name = (pm_node_t *) pm_local_variable_target_node_create(
16942  parser,
16943  &PM_LOCATION_TOKEN_VALUE(&identifier),
16944  constant_id,
16945  (uint32_t) (depth == -1 ? 0 : depth)
16946  );
16947  }
16948 
16949  // Finally we can return the created node.
16950  return pm_splat_node_create(parser, &operator, name);
16951 }
16952 
16956 static pm_node_t *
16957 parse_pattern_keyword_rest(pm_parser_t *parser, pm_constant_id_list_t *captures) {
16958  assert(parser->current.type == PM_TOKEN_USTAR_STAR);
16959  parser_lex(parser);
16960 
16961  pm_token_t operator = parser->previous;
16962  pm_node_t *value = NULL;
16963 
16964  if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16965  return (pm_node_t *) pm_no_keywords_parameter_node_create(parser, &operator, &parser->previous);
16966  }
16967 
16968  if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16969  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
16970 
16971  int depth;
16972  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16973  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
16974  }
16975 
16976  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
16977  value = (pm_node_t *) pm_local_variable_target_node_create(
16978  parser,
16979  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
16980  constant_id,
16981  (uint32_t) (depth == -1 ? 0 : depth)
16982  );
16983  }
16984 
16985  return (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
16986 }
16987 
16992 static bool
16993 pm_slice_is_valid_local(const pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
16994  ptrdiff_t length = end - start;
16995  if (length == 0) return false;
16996 
16997  // First ensure that it starts with a valid identifier starting character.
16998  size_t width = char_is_identifier_start(parser, start);
16999  if (width == 0) return false;
17000 
17001  // Next, ensure that it's not an uppercase character.
17002  if (parser->encoding_changed) {
17003  if (parser->encoding->isupper_char(start, length)) return false;
17004  } else {
17005  if (pm_encoding_utf_8_isupper_char(start, length)) return false;
17006  }
17007 
17008  // Next, iterate through all of the bytes of the string to ensure that they
17009  // are all valid identifier characters.
17010  const uint8_t *cursor = start + width;
17011  while ((cursor < end) && (width = char_is_identifier(parser, cursor))) cursor += width;
17012  return cursor == end;
17013 }
17014 
17019 static pm_node_t *
17020 parse_pattern_hash_implicit_value(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_symbol_node_t *key) {
17021  const pm_location_t *value_loc = &((pm_symbol_node_t *) key)->value_loc;
17022 
17023  pm_constant_id_t constant_id = pm_parser_constant_id_location(parser, value_loc->start, value_loc->end);
17024  int depth = -1;
17025 
17026  if (pm_slice_is_valid_local(parser, value_loc->start, value_loc->end)) {
17027  depth = pm_parser_local_depth_constant_id(parser, constant_id);
17028  } else {
17029  pm_parser_err(parser, key->base.location.start, key->base.location.end, PM_ERR_PATTERN_HASH_KEY_LOCALS);
17030 
17031  if ((value_loc->end > value_loc->start) && ((value_loc->end[-1] == '!') || (value_loc->end[-1] == '?'))) {
17032  PM_PARSER_ERR_LOCATION_FORMAT(parser, value_loc, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (int) (value_loc->end - value_loc->start), (const char *) value_loc->start);
17033  }
17034  }
17035 
17036  if (depth == -1) {
17037  pm_parser_local_add(parser, constant_id, value_loc->start, value_loc->end, 0);
17038  }
17039 
17040  parse_pattern_capture(parser, captures, constant_id, value_loc);
17041  pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17042  parser,
17043  value_loc,
17044  constant_id,
17045  (uint32_t) (depth == -1 ? 0 : depth)
17046  );
17047 
17048  return (pm_node_t *) pm_implicit_node_create(parser, (pm_node_t *) target);
17049 }
17050 
17055 static void
17056 parse_pattern_hash_key(pm_parser_t *parser, pm_static_literals_t *keys, pm_node_t *node) {
17057  if (pm_static_literals_add(&parser->newline_list, parser->start_line, keys, node, true) != NULL) {
17058  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
17059  }
17060 }
17061 
17065 static pm_hash_pattern_node_t *
17066 parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, uint16_t depth) {
17067  pm_node_list_t assocs = { 0 };
17068  pm_static_literals_t keys = { 0 };
17069  pm_node_t *rest = NULL;
17070 
17071  switch (PM_NODE_TYPE(first_node)) {
17072  case PM_ASSOC_SPLAT_NODE:
17074  rest = first_node;
17075  break;
17076  case PM_SYMBOL_NODE: {
17077  if (pm_symbol_node_label_p(first_node)) {
17078  parse_pattern_hash_key(parser, &keys, first_node);
17079  pm_node_t *value;
17080 
17082  // Otherwise, we will create an implicit local variable
17083  // target for the value.
17084  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) first_node);
17085  } else {
17086  // Here we have a value for the first assoc in the list, so
17087  // we will parse it now.
17088  value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17089  }
17090 
17091  pm_token_t operator = not_provided(parser);
17092  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17093 
17094  pm_node_list_append(&assocs, assoc);
17095  break;
17096  }
17097  }
17098  /* fallthrough */
17099  default: {
17100  // If we get anything else, then this is an error. For this we'll
17101  // create a missing node for the value and create an assoc node for
17102  // the first node in the list.
17103  pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
17104  pm_parser_err_node(parser, first_node, diag_id);
17105 
17106  pm_token_t operator = not_provided(parser);
17107  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, first_node->location.start, first_node->location.end);
17108  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, first_node, &operator, value);
17109 
17110  pm_node_list_append(&assocs, assoc);
17111  break;
17112  }
17113  }
17114 
17115  // If there are any other assocs, then we'll parse them now.
17116  while (accept1(parser, PM_TOKEN_COMMA)) {
17117  // Here we need to break to support trailing commas.
17119  // Trailing commas are not allowed to follow a rest pattern.
17120  if (rest != NULL) {
17121  pm_parser_err_token(parser, &parser->current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17122  }
17123 
17124  break;
17125  }
17126 
17127  if (match1(parser, PM_TOKEN_USTAR_STAR)) {
17128  pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
17129 
17130  if (rest == NULL) {
17131  rest = assoc;
17132  } else {
17133  pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17134  pm_node_list_append(&assocs, assoc);
17135  }
17136  } else {
17137  pm_node_t *key;
17138 
17139  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17140  key = parse_strings(parser, NULL, true, (uint16_t) (depth + 1));
17141 
17143  pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
17144  } else if (!pm_symbol_node_label_p(key)) {
17145  pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17146  }
17147  } else {
17148  expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
17149  key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17150  }
17151 
17152  parse_pattern_hash_key(parser, &keys, key);
17153  pm_node_t *value = NULL;
17154 
17156  value = parse_pattern_hash_implicit_value(parser, captures, (pm_symbol_node_t *) key);
17157  } else {
17158  value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
17159  }
17160 
17161  pm_token_t operator = not_provided(parser);
17162  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, key, &operator, value);
17163 
17164  if (rest != NULL) {
17165  pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
17166  }
17167 
17168  pm_node_list_append(&assocs, assoc);
17169  }
17170  }
17171 
17172  pm_hash_pattern_node_t *node = pm_hash_pattern_node_node_list_create(parser, &assocs, rest);
17173  xfree(assocs.nodes);
17174 
17175  pm_static_literals_free(&keys);
17176  return node;
17177 }
17178 
17182 static pm_node_t *
17183 parse_pattern_primitive(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_diagnostic_id_t diag_id, uint16_t depth) {
17184  switch (parser->current.type) {
17185  case PM_TOKEN_IDENTIFIER:
17186  case PM_TOKEN_METHOD_NAME: {
17187  parser_lex(parser);
17188  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17189 
17190  int depth;
17191  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17192  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17193  }
17194 
17195  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17196  return (pm_node_t *) pm_local_variable_target_node_create(
17197  parser,
17198  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17199  constant_id,
17200  (uint32_t) (depth == -1 ? 0 : depth)
17201  );
17202  }
17204  pm_token_t opening = parser->current;
17205  parser_lex(parser);
17206 
17207  if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17208  // If we have an empty array pattern, then we'll just return a new
17209  // array pattern node.
17210  return (pm_node_t *) pm_array_pattern_node_empty_create(parser, &opening, &parser->previous);
17211  }
17212 
17213  // Otherwise, we'll parse the inner pattern, then deal with it depending
17214  // on the type it returns.
17215  pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
17216 
17217  accept1(parser, PM_TOKEN_NEWLINE);
17218  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET);
17219  pm_token_t closing = parser->previous;
17220 
17221  switch (PM_NODE_TYPE(inner)) {
17222  case PM_ARRAY_PATTERN_NODE: {
17223  pm_array_pattern_node_t *pattern_node = (pm_array_pattern_node_t *) inner;
17224  if (pattern_node->opening_loc.start == NULL) {
17225  pattern_node->base.location.start = opening.start;
17226  pattern_node->base.location.end = closing.end;
17227 
17228  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17229  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17230 
17231  return (pm_node_t *) pattern_node;
17232  }
17233 
17234  break;
17235  }
17236  case PM_FIND_PATTERN_NODE: {
17237  pm_find_pattern_node_t *pattern_node = (pm_find_pattern_node_t *) inner;
17238  if (pattern_node->opening_loc.start == NULL) {
17239  pattern_node->base.location.start = opening.start;
17240  pattern_node->base.location.end = closing.end;
17241 
17242  pattern_node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17243  pattern_node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17244 
17245  return (pm_node_t *) pattern_node;
17246  }
17247 
17248  break;
17249  }
17250  default:
17251  break;
17252  }
17253 
17254  pm_array_pattern_node_t *node = pm_array_pattern_node_empty_create(parser, &opening, &closing);
17255  pm_array_pattern_node_requireds_append(node, inner);
17256  return (pm_node_t *) node;
17257  }
17258  case PM_TOKEN_BRACE_LEFT: {
17259  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17260  parser->pattern_matching_newlines = false;
17261 
17262  pm_hash_pattern_node_t *node;
17263  pm_token_t opening = parser->current;
17264  parser_lex(parser);
17265 
17266  if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
17267  // If we have an empty hash pattern, then we'll just return a new hash
17268  // pattern node.
17269  node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->previous);
17270  } else {
17271  pm_node_t *first_node;
17272 
17273  switch (parser->current.type) {
17274  case PM_TOKEN_LABEL:
17275  parser_lex(parser);
17276  first_node = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17277  break;
17278  case PM_TOKEN_USTAR_STAR:
17279  first_node = parse_pattern_keyword_rest(parser, captures);
17280  break;
17281  case PM_TOKEN_STRING_BEGIN:
17282  first_node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
17283  break;
17284  default: {
17285  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_PATTERN_HASH_KEY, pm_token_type_human(parser->current.type));
17286  parser_lex(parser);
17287 
17288  first_node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
17289  break;
17290  }
17291  }
17292 
17293  node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
17294 
17295  accept1(parser, PM_TOKEN_NEWLINE);
17296  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE);
17297  pm_token_t closing = parser->previous;
17298 
17299  node->base.location.start = opening.start;
17300  node->base.location.end = closing.end;
17301 
17302  node->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
17303  node->closing_loc = PM_LOCATION_TOKEN_VALUE(&closing);
17304  }
17305 
17306  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17307  return (pm_node_t *) node;
17308  }
17309  case PM_TOKEN_UDOT_DOT:
17310  case PM_TOKEN_UDOT_DOT_DOT: {
17311  pm_token_t operator = parser->current;
17312  parser_lex(parser);
17313 
17314  // Since we have a unary range operator, we need to parse the subsequent
17315  // expression as the right side of the range.
17316  switch (parser->current.type) {
17317  case PM_CASE_PRIMITIVE: {
17318  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17319  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17320  }
17321  default: {
17322  pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
17323  pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17324  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
17325  }
17326  }
17327  }
17328  case PM_CASE_PRIMITIVE: {
17329  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX, false, true, diag_id, (uint16_t) (depth + 1));
17330 
17331  // If we found a label, we need to immediately return to the caller.
17332  if (pm_symbol_node_label_p(node)) return node;
17333 
17334  // Now that we have a primitive, we need to check if it's part of a range.
17335  if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17336  pm_token_t operator = parser->previous;
17337 
17338  // Now that we have the operator, we need to check if this is followed
17339  // by another expression. If it is, then we will create a full range
17340  // node. Otherwise, we'll create an endless range.
17341  switch (parser->current.type) {
17342  case PM_CASE_PRIMITIVE: {
17343  pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX, false, false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
17344  return (pm_node_t *) pm_range_node_create(parser, node, &operator, right);
17345  }
17346  default:
17347  return (pm_node_t *) pm_range_node_create(parser, node, &operator, NULL);
17348  }
17349  }
17350 
17351  return node;
17352  }
17353  case PM_TOKEN_CARET: {
17354  parser_lex(parser);
17355  pm_token_t operator = parser->previous;
17356 
17357  // At this point we have a pin operator. We need to check the subsequent
17358  // expression to determine if it's a variable or an expression.
17359  switch (parser->current.type) {
17360  case PM_TOKEN_IDENTIFIER: {
17361  parser_lex(parser);
17362  pm_node_t *variable = (pm_node_t *) parse_variable(parser);
17363 
17364  if (variable == NULL) {
17365  PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
17366  variable = (pm_node_t *) pm_local_variable_read_node_missing_create(parser, &parser->previous, 0);
17367  }
17368 
17369  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17370  }
17372  parser_lex(parser);
17373  pm_node_t *variable = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
17374 
17375  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17376  }
17377  case PM_TOKEN_CLASS_VARIABLE: {
17378  parser_lex(parser);
17379  pm_node_t *variable = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
17380 
17381  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17382  }
17383  case PM_TOKEN_GLOBAL_VARIABLE: {
17384  parser_lex(parser);
17385  pm_node_t *variable = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
17386 
17387  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17388  }
17390  parser_lex(parser);
17391  pm_node_t *variable = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
17392 
17393  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17394  }
17395  case PM_TOKEN_BACK_REFERENCE: {
17396  parser_lex(parser);
17397  pm_node_t *variable = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
17398 
17399  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17400  }
17402  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
17403  parser->pattern_matching_newlines = false;
17404 
17405  pm_token_t lparen = parser->current;
17406  parser_lex(parser);
17407 
17408  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
17409  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
17410 
17411  accept1(parser, PM_TOKEN_NEWLINE);
17412  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17413  return (pm_node_t *) pm_pinned_expression_node_create(parser, expression, &operator, &lparen, &parser->previous);
17414  }
17415  default: {
17416  // If we get here, then we have a pin operator followed by something
17417  // not understood. We'll create a missing node and return that.
17418  pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
17419  pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
17420  return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
17421  }
17422  }
17423  }
17424  case PM_TOKEN_UCOLON_COLON: {
17425  pm_token_t delimiter = parser->current;
17426  parser_lex(parser);
17427 
17428  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17429  pm_constant_path_node_t *node = pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
17430 
17431  return parse_pattern_constant_path(parser, captures, (pm_node_t *) node, (uint16_t) (depth + 1));
17432  }
17433  case PM_TOKEN_CONSTANT: {
17434  pm_token_t constant = parser->current;
17435  parser_lex(parser);
17436 
17437  pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &constant);
17438  return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
17439  }
17440  default:
17441  pm_parser_err_current(parser, diag_id);
17442  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17443  }
17444 }
17445 
17450 static pm_node_t *
17451 parse_pattern_primitives(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node_t *first_node, pm_diagnostic_id_t diag_id, uint16_t depth) {
17452  pm_node_t *node = first_node;
17453 
17454  while ((node == NULL) || accept1(parser, PM_TOKEN_PIPE)) {
17455  pm_token_t operator = parser->previous;
17456 
17457  switch (parser->current.type) {
17458  case PM_TOKEN_IDENTIFIER:
17460  case PM_TOKEN_BRACE_LEFT:
17461  case PM_TOKEN_CARET:
17462  case PM_TOKEN_CONSTANT:
17463  case PM_TOKEN_UCOLON_COLON:
17464  case PM_TOKEN_UDOT_DOT:
17465  case PM_TOKEN_UDOT_DOT_DOT:
17466  case PM_CASE_PRIMITIVE: {
17467  if (node == NULL) {
17468  node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17469  } else {
17470  pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
17471  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17472  }
17473 
17474  break;
17475  }
17478  pm_token_t opening = parser->current;
17479  parser_lex(parser);
17480 
17481  pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
17482  accept1(parser, PM_TOKEN_NEWLINE);
17483  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN);
17484  pm_node_t *right = (pm_node_t *) pm_parentheses_node_create(parser, &opening, body, &parser->previous);
17485 
17486  if (node == NULL) {
17487  node = right;
17488  } else {
17489  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17490  }
17491 
17492  break;
17493  }
17494  default: {
17495  pm_parser_err_current(parser, diag_id);
17496  pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
17497 
17498  if (node == NULL) {
17499  node = right;
17500  } else {
17501  node = (pm_node_t *) pm_alternation_pattern_node_create(parser, node, right, &operator);
17502  }
17503 
17504  break;
17505  }
17506  }
17507  }
17508 
17509  // If we have an =>, then we are assigning this pattern to a variable.
17510  // In this case we should create an assignment node.
17511  while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17512  pm_token_t operator = parser->previous;
17513  expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
17514 
17515  pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, &parser->previous);
17516  int depth;
17517 
17518  if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
17519  pm_parser_local_add(parser, constant_id, parser->previous.start, parser->previous.end, 0);
17520  }
17521 
17522  parse_pattern_capture(parser, captures, constant_id, &PM_LOCATION_TOKEN_VALUE(&parser->previous));
17523  pm_local_variable_target_node_t *target = pm_local_variable_target_node_create(
17524  parser,
17525  &PM_LOCATION_TOKEN_VALUE(&parser->previous),
17526  constant_id,
17527  (uint32_t) (depth == -1 ? 0 : depth)
17528  );
17529 
17530  node = (pm_node_t *) pm_capture_pattern_node_create(parser, node, target, &operator);
17531  }
17532 
17533  return node;
17534 }
17535 
17539 static pm_node_t *
17540 parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flags, pm_diagnostic_id_t diag_id, uint16_t depth) {
17541  pm_node_t *node = NULL;
17542 
17543  bool leading_rest = false;
17544  bool trailing_rest = false;
17545 
17546  switch (parser->current.type) {
17547  case PM_TOKEN_LABEL: {
17548  parser_lex(parser);
17549  pm_node_t *key = (pm_node_t *) pm_symbol_node_label_create(parser, &parser->previous);
17550  node = (pm_node_t *) parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1));
17551 
17552  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17553  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17554  }
17555 
17556  return node;
17557  }
17558  case PM_TOKEN_USTAR_STAR: {
17559  node = parse_pattern_keyword_rest(parser, captures);
17560  node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17561 
17562  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17563  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17564  }
17565 
17566  return node;
17567  }
17568  case PM_TOKEN_STRING_BEGIN: {
17569  // We need special handling for string beginnings because they could
17570  // be dynamic symbols leading to hash patterns.
17571  node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17572 
17573  if (pm_symbol_node_label_p(node)) {
17574  node = (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17575 
17576  if (!(flags & PM_PARSE_PATTERN_TOP)) {
17577  pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17578  }
17579 
17580  return node;
17581  }
17582 
17583  node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17584  break;
17585  }
17586  case PM_TOKEN_USTAR: {
17587  if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17588  parser_lex(parser);
17589  node = (pm_node_t *) parse_pattern_rest(parser, captures);
17590  leading_rest = true;
17591  break;
17592  }
17593  }
17594  /* fallthrough */
17595  default:
17596  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17597  break;
17598  }
17599 
17600  // If we got a dynamic label symbol, then we need to treat it like the
17601  // beginning of a hash pattern.
17602  if (pm_symbol_node_label_p(node)) {
17603  return (pm_node_t *) parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1));
17604  }
17605 
17606  if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17607  // If we have a comma, then we are now parsing either an array pattern
17608  // or a find pattern. We need to parse all of the patterns, put them
17609  // into a big list, and then determine which type of node we have.
17610  pm_node_list_t nodes = { 0 };
17611  pm_node_list_append(&nodes, node);
17612 
17613  // Gather up all of the patterns into the list.
17614  while (accept1(parser, PM_TOKEN_COMMA)) {
17615  // Break early here in case we have a trailing comma.
17617  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17618  pm_node_list_append(&nodes, node);
17619  trailing_rest = true;
17620  break;
17621  }
17622 
17623  if (accept1(parser, PM_TOKEN_USTAR)) {
17624  node = (pm_node_t *) parse_pattern_rest(parser, captures);
17625 
17626  // If we have already parsed a splat pattern, then this is an
17627  // error. We will continue to parse the rest of the patterns,
17628  // but we will indicate it as an error.
17629  if (trailing_rest) {
17630  pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17631  }
17632 
17633  trailing_rest = true;
17634  } else {
17635  node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17636  }
17637 
17638  pm_node_list_append(&nodes, node);
17639  }
17640 
17641  // If the first pattern and the last pattern are rest patterns, then we
17642  // will call this a find pattern, regardless of how many rest patterns
17643  // are in between because we know we already added the appropriate
17644  // errors. Otherwise we will create an array pattern.
17645  if (leading_rest && PM_NODE_TYPE_P(nodes.nodes[nodes.size - 1], PM_SPLAT_NODE)) {
17646  node = (pm_node_t *) pm_find_pattern_node_create(parser, &nodes);
17647 
17648  if (nodes.size == 2) {
17649  pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17650  }
17651  } else {
17652  node = (pm_node_t *) pm_array_pattern_node_node_list_create(parser, &nodes);
17653 
17654  if (leading_rest && trailing_rest) {
17655  pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17656  }
17657  }
17658 
17659  xfree(nodes.nodes);
17660  } else if (leading_rest) {
17661  // Otherwise, if we parsed a single splat pattern, then we know we have
17662  // an array pattern, so we can go ahead and create that node.
17663  node = (pm_node_t *) pm_array_pattern_node_rest_create(parser, node);
17664  }
17665 
17666  return node;
17667 }
17668 
17674 static inline void
17675 parse_negative_numeric(pm_node_t *node) {
17676  switch (PM_NODE_TYPE(node)) {
17677  case PM_INTEGER_NODE: {
17678  pm_integer_node_t *cast = (pm_integer_node_t *) node;
17679  cast->base.location.start--;
17680  cast->value.negative = true;
17681  break;
17682  }
17683  case PM_FLOAT_NODE: {
17684  pm_float_node_t *cast = (pm_float_node_t *) node;
17685  cast->base.location.start--;
17686  cast->value = -cast->value;
17687  break;
17688  }
17689  case PM_RATIONAL_NODE: {
17690  pm_rational_node_t *cast = (pm_rational_node_t *) node;
17691  cast->base.location.start--;
17692  cast->numerator.negative = true;
17693  break;
17694  }
17695  case PM_IMAGINARY_NODE:
17696  node->location.start--;
17697  parse_negative_numeric(((pm_imaginary_node_t *) node)->numeric);
17698  break;
17699  default:
17700  assert(false && "unreachable");
17701  break;
17702  }
17703 }
17704 
17710 static void
17711 pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
17712  switch (diag_id) {
17713  case PM_ERR_HASH_KEY: {
17714  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
17715  break;
17716  }
17717  case PM_ERR_HASH_VALUE:
17718  case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17719  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17720  break;
17721  }
17722  case PM_ERR_UNARY_RECEIVER: {
17723  const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
17724  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
17725  break;
17726  }
17727  case PM_ERR_UNARY_DISALLOWED:
17728  case PM_ERR_EXPECT_ARGUMENT: {
17729  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, pm_token_type_human(parser->current.type));
17730  break;
17731  }
17732  default:
17733  pm_parser_err_previous(parser, diag_id);
17734  break;
17735  }
17736 }
17737 
17741 static void
17742 parse_retry(pm_parser_t *parser, const pm_node_t *node) {
17743 #define CONTEXT_NONE 0
17744 #define CONTEXT_THROUGH_ENSURE 1
17745 #define CONTEXT_THROUGH_ELSE 2
17746 
17747  pm_context_node_t *context_node = parser->current_context;
17748  int context = CONTEXT_NONE;
17749 
17750  while (context_node != NULL) {
17751  switch (context_node->context) {
17755  case PM_CONTEXT_DEF_RESCUE:
17759  case PM_CONTEXT_DEFINED:
17761  // These are the good cases. We're allowed to have a retry here.
17762  return;
17763  case PM_CONTEXT_CLASS:
17764  case PM_CONTEXT_DEF:
17765  case PM_CONTEXT_DEF_PARAMS:
17766  case PM_CONTEXT_MAIN:
17767  case PM_CONTEXT_MODULE:
17768  case PM_CONTEXT_PREEXE:
17769  case PM_CONTEXT_SCLASS:
17770  // These are the bad cases. We're not allowed to have a retry in
17771  // these contexts.
17772  if (context == CONTEXT_NONE) {
17773  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17774  } else if (context == CONTEXT_THROUGH_ENSURE) {
17775  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17776  } else if (context == CONTEXT_THROUGH_ELSE) {
17777  pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17778  }
17779  return;
17780  case PM_CONTEXT_BEGIN_ELSE:
17781  case PM_CONTEXT_BLOCK_ELSE:
17782  case PM_CONTEXT_CLASS_ELSE:
17783  case PM_CONTEXT_DEF_ELSE:
17787  // These are also bad cases, but with a more specific error
17788  // message indicating the else.
17789  context = CONTEXT_THROUGH_ELSE;
17790  break;
17794  case PM_CONTEXT_DEF_ENSURE:
17798  // These are also bad cases, but with a more specific error
17799  // message indicating the ensure.
17800  context = CONTEXT_THROUGH_ENSURE;
17801  break;
17802  case PM_CONTEXT_NONE:
17803  // This case should never happen.
17804  assert(false && "unreachable");
17805  break;
17806  case PM_CONTEXT_BEGIN:
17809  case PM_CONTEXT_CASE_IN:
17810  case PM_CONTEXT_CASE_WHEN:
17812  case PM_CONTEXT_ELSE:
17813  case PM_CONTEXT_ELSIF:
17814  case PM_CONTEXT_EMBEXPR:
17815  case PM_CONTEXT_FOR_INDEX:
17816  case PM_CONTEXT_FOR:
17817  case PM_CONTEXT_IF:
17822  case PM_CONTEXT_PARENS:
17823  case PM_CONTEXT_POSTEXE:
17824  case PM_CONTEXT_PREDICATE:
17825  case PM_CONTEXT_TERNARY:
17826  case PM_CONTEXT_UNLESS:
17827  case PM_CONTEXT_UNTIL:
17828  case PM_CONTEXT_WHILE:
17829  // In these contexts we should continue walking up the list of
17830  // contexts.
17831  break;
17832  }
17833 
17834  context_node = context_node->prev;
17835  }
17836 
17837 #undef CONTEXT_NONE
17838 #undef CONTEXT_ENSURE
17839 #undef CONTEXT_ELSE
17840 }
17841 
17845 static void
17846 parse_yield(pm_parser_t *parser, const pm_node_t *node) {
17847  pm_context_node_t *context_node = parser->current_context;
17848 
17849  while (context_node != NULL) {
17850  switch (context_node->context) {
17851  case PM_CONTEXT_DEF:
17852  case PM_CONTEXT_DEF_PARAMS:
17853  case PM_CONTEXT_DEFINED:
17854  case PM_CONTEXT_DEF_ENSURE:
17855  case PM_CONTEXT_DEF_RESCUE:
17856  case PM_CONTEXT_DEF_ELSE:
17857  // These are the good cases. We're allowed to have a block exit
17858  // in these contexts.
17859  return;
17860  case PM_CONTEXT_CLASS:
17863  case PM_CONTEXT_CLASS_ELSE:
17864  case PM_CONTEXT_MAIN:
17865  case PM_CONTEXT_MODULE:
17869  case PM_CONTEXT_SCLASS:
17873  // These are the bad cases. We're not allowed to have a retry in
17874  // these contexts.
17875  pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17876  return;
17877  case PM_CONTEXT_NONE:
17878  // This case should never happen.
17879  assert(false && "unreachable");
17880  break;
17881  case PM_CONTEXT_BEGIN:
17882  case PM_CONTEXT_BEGIN_ELSE:
17887  case PM_CONTEXT_BLOCK_ELSE:
17890  case PM_CONTEXT_CASE_IN:
17891  case PM_CONTEXT_CASE_WHEN:
17893  case PM_CONTEXT_ELSE:
17894  case PM_CONTEXT_ELSIF:
17895  case PM_CONTEXT_EMBEXPR:
17896  case PM_CONTEXT_FOR_INDEX:
17897  case PM_CONTEXT_FOR:
17898  case PM_CONTEXT_IF:
17906  case PM_CONTEXT_PARENS:
17907  case PM_CONTEXT_POSTEXE:
17908  case PM_CONTEXT_PREDICATE:
17909  case PM_CONTEXT_PREEXE:
17911  case PM_CONTEXT_TERNARY:
17912  case PM_CONTEXT_UNLESS:
17913  case PM_CONTEXT_UNTIL:
17914  case PM_CONTEXT_WHILE:
17915  // In these contexts we should continue walking up the list of
17916  // contexts.
17917  break;
17918  }
17919 
17920  context_node = context_node->prev;
17921  }
17922 }
17923 
17928 typedef struct {
17931 
17933  const uint8_t *start;
17934 
17936  const uint8_t *end;
17937 
17944  bool shared;
17946 
17951 static void
17952 parse_regular_expression_error(const uint8_t *start, const uint8_t *end, const char *message, void *data) {
17954  pm_location_t location;
17955 
17956  if (callback_data->shared) {
17957  location = (pm_location_t) { .start = start, .end = end };
17958  } else {
17959  location = (pm_location_t) { .start = callback_data->start, .end = callback_data->end };
17960  }
17961 
17962  PM_PARSER_ERR_FORMAT(callback_data->parser, location.start, location.end, PM_ERR_REGEXP_PARSE_ERROR, message);
17963 }
17964 
17968 static void
17969 parse_regular_expression_errors(pm_parser_t *parser, pm_regular_expression_node_t *node) {
17970  const pm_string_t *unescaped = &node->unescaped;
17972  .parser = parser,
17973  .start = node->base.location.start,
17974  .end = node->base.location.end,
17975  .shared = unescaped->type == PM_STRING_SHARED
17976  };
17977 
17978  pm_regexp_parse(parser, pm_string_source(unescaped), pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17979 }
17980 
17984 static inline pm_node_t *
17985 parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
17986  switch (parser->current.type) {
17988  parser_lex(parser);
17989 
17990  pm_array_node_t *array = pm_array_node_create(parser, &parser->previous);
17991  pm_accepts_block_stack_push(parser, true);
17992  bool parsed_bare_hash = false;
17993 
17994  while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17995  bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17996 
17997  // Handle the case where we don't have a comma and we have a
17998  // newline followed by a right bracket.
17999  if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18000  break;
18001  }
18002 
18003  // Ensure that we have a comma between elements in the array.
18004  if (array->elements.size > 0) {
18005  if (accept1(parser, PM_TOKEN_COMMA)) {
18006  // If there was a comma but we also accepts a newline,
18007  // then this is a syntax error.
18008  if (accepted_newline) {
18009  pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
18010  }
18011  } else {
18012  // If there was no comma, then we need to add a syntax
18013  // error.
18014  const uint8_t *location = parser->previous.end;
18015  PM_PARSER_ERR_FORMAT(parser, location, location, PM_ERR_ARRAY_SEPARATOR, pm_token_type_human(parser->current.type));
18016 
18017  parser->previous.start = location;
18018  parser->previous.type = PM_TOKEN_MISSING;
18019  }
18020  }
18021 
18022  // If we have a right bracket immediately following a comma,
18023  // this is allowed since it's a trailing comma. In this case we
18024  // can break out of the loop.
18025  if (match1(parser, PM_TOKEN_BRACKET_RIGHT)) break;
18026 
18027  pm_node_t *element;
18028 
18029  if (accept1(parser, PM_TOKEN_USTAR)) {
18030  pm_token_t operator = parser->previous;
18031  pm_node_t *expression = NULL;
18032 
18033  if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
18034  pm_parser_scope_forwarding_positionals_check(parser, &operator);
18035  } else {
18036  expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18037  }
18038 
18039  element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
18040  } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
18041  if (parsed_bare_hash) {
18042  pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
18043  }
18044 
18045  element = (pm_node_t *) pm_keyword_hash_node_create(parser);
18046  pm_static_literals_t hash_keys = { 0 };
18047 
18049  parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18050  }
18051 
18052  pm_static_literals_free(&hash_keys);
18053  parsed_bare_hash = true;
18054  } else {
18055  element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
18056 
18057  if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
18058  if (parsed_bare_hash) {
18059  pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
18060  }
18061 
18062  pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
18063  pm_static_literals_t hash_keys = { 0 };
18064  pm_hash_key_static_literals_add(parser, &hash_keys, element);
18065 
18066  pm_token_t operator;
18067  if (parser->previous.type == PM_TOKEN_EQUAL_GREATER) {
18068  operator = parser->previous;
18069  } else {
18070  operator = not_provided(parser);
18071  }
18072 
18073  pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
18074  pm_node_t *assoc = (pm_node_t *) pm_assoc_node_create(parser, element, &operator, value);
18075  pm_keyword_hash_node_elements_append(hash, assoc);
18076 
18077  element = (pm_node_t *) hash;
18078  if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18079  parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
18080  }
18081 
18082  pm_static_literals_free(&hash_keys);
18083  parsed_bare_hash = true;
18084  }
18085  }
18086 
18087  pm_array_node_elements_append(array, element);
18088  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
18089  }
18090 
18091  accept1(parser, PM_TOKEN_NEWLINE);
18092 
18093  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
18094  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
18095  parser->previous.start = parser->previous.end;
18096  parser->previous.type = PM_TOKEN_MISSING;
18097  }
18098 
18099  pm_array_node_close_set(array, &parser->previous);
18100  pm_accepts_block_stack_pop(parser);
18101 
18102  return (pm_node_t *) array;
18103  }
18106  pm_token_t opening = parser->current;
18107 
18108  pm_node_list_t current_block_exits = { 0 };
18109  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18110 
18111  parser_lex(parser);
18112  while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
18113 
18114  // If this is the end of the file or we match a right parenthesis, then
18115  // we have an empty parentheses node, and we can immediately return.
18116  if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
18117  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18118 
18119  pop_block_exits(parser, previous_block_exits);
18120  pm_node_list_free(&current_block_exits);
18121 
18122  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, NULL, &parser->previous);
18123  }
18124 
18125  // Otherwise, we're going to parse the first statement in the list
18126  // of statements within the parentheses.
18127  pm_accepts_block_stack_push(parser, true);
18128  context_push(parser, PM_CONTEXT_PARENS);
18129  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18130  context_pop(parser);
18131 
18132  // Determine if this statement is followed by a terminator. In the
18133  // case of a single statement, this is fine. But in the case of
18134  // multiple statements it's required.
18135  bool terminator_found = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18136  if (terminator_found) {
18137  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18138  }
18139 
18140  // If we hit a right parenthesis, then we're done parsing the
18141  // parentheses node, and we can check which kind of node we should
18142  // return.
18143  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18145  lex_state_set(parser, PM_LEX_STATE_ENDARG);
18146  }
18147 
18148  parser_lex(parser);
18149  pm_accepts_block_stack_pop(parser);
18150 
18151  pop_block_exits(parser, previous_block_exits);
18152  pm_node_list_free(&current_block_exits);
18153 
18154  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18155  // If we have a single statement and are ending on a right
18156  // parenthesis, then we need to check if this is possibly a
18157  // multiple target node.
18158  pm_multi_target_node_t *multi_target;
18159 
18160  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((pm_multi_target_node_t *) statement)->lparen_loc.start == NULL) {
18161  multi_target = (pm_multi_target_node_t *) statement;
18162  } else {
18163  multi_target = pm_multi_target_node_create(parser);
18164  pm_multi_target_node_targets_append(parser, multi_target, statement);
18165  }
18166 
18167  pm_location_t lparen_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18168  pm_location_t rparen_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
18169 
18170  multi_target->lparen_loc = lparen_loc;
18171  multi_target->rparen_loc = rparen_loc;
18172  multi_target->base.location.start = lparen_loc.start;
18173  multi_target->base.location.end = rparen_loc.end;
18174 
18175  pm_node_t *result;
18176  if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
18177  result = parse_targets(parser, (pm_node_t *) multi_target, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18178  accept1(parser, PM_TOKEN_NEWLINE);
18179  } else {
18180  result = (pm_node_t *) multi_target;
18181  }
18182 
18183  if (context_p(parser, PM_CONTEXT_MULTI_TARGET)) {
18184  // All set, this is explicitly allowed by the parent
18185  // context.
18186  } else if (context_p(parser, PM_CONTEXT_FOR_INDEX) && match1(parser, PM_TOKEN_KEYWORD_IN)) {
18187  // All set, we're inside a for loop and we're parsing
18188  // multiple targets.
18189  } else if (binding_power != PM_BINDING_POWER_STATEMENT) {
18190  // Multi targets are not allowed when it's not a
18191  // statement level.
18192  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18193  } else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
18194  // Multi targets must be followed by an equal sign in
18195  // order to be valid (or a right parenthesis if they are
18196  // nested).
18197  pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
18198  }
18199 
18200  return result;
18201  }
18202 
18203  // If we have a single statement and are ending on a right parenthesis
18204  // and we didn't return a multiple assignment node, then we can return a
18205  // regular parentheses node now.
18206  pm_statements_node_t *statements = pm_statements_node_create(parser);
18207  pm_statements_node_body_append(parser, statements, statement, true);
18208 
18209  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18210  }
18211 
18212  // If we have more than one statement in the set of parentheses,
18213  // then we are going to parse all of them as a list of statements.
18214  // We'll do that here.
18215  context_push(parser, PM_CONTEXT_PARENS);
18216  pm_statements_node_t *statements = pm_statements_node_create(parser);
18217  pm_statements_node_body_append(parser, statements, statement, true);
18218 
18219  // If we didn't find a terminator and we didn't find a right
18220  // parenthesis, then this is a syntax error.
18221  if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
18222  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18223  }
18224 
18225  // Parse each statement within the parentheses.
18226  while (true) {
18227  pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT, true, false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
18228  pm_statements_node_body_append(parser, statements, node, true);
18229 
18230  // If we're recovering from a syntax error, then we need to stop
18231  // parsing the statements now.
18232  if (parser->recovering) {
18233  // If this is the level of context where the recovery has
18234  // happened, then we can mark the parser as done recovering.
18235  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->recovering = false;
18236  break;
18237  }
18238 
18239  // If we couldn't parse an expression at all, then we need to
18240  // bail out of the loop.
18241  if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) break;
18242 
18243  // If we successfully parsed a statement, then we are going to
18244  // need terminator to delimit them.
18245  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18246  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18247  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) break;
18248  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18249  break;
18250  } else if (!match1(parser, PM_TOKEN_EOF)) {
18251  // If we're at the end of the file, then we're going to add
18252  // an error after this for the ) anyway.
18253  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
18254  }
18255  }
18256 
18257  context_pop(parser);
18258  pm_accepts_block_stack_pop(parser);
18259  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18260 
18261  // When we're parsing multi targets, we allow them to be followed by
18262  // a right parenthesis if they are at the statement level. This is
18263  // only possible if they are the final statement in a parentheses.
18264  // We need to explicitly reject that here.
18265  {
18266  pm_node_t *statement = statements->body.nodes[statements->body.size - 1];
18267 
18268  if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
18269  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
18270  pm_multi_target_node_targets_append(parser, multi_target, statement);
18271 
18272  statement = (pm_node_t *) multi_target;
18273  statements->body.nodes[statements->body.size - 1] = statement;
18274  }
18275 
18276  if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
18277  const uint8_t *offset = statement->location.end;
18278  pm_token_t operator = { .type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
18279  pm_node_t *value = (pm_node_t *) pm_missing_node_create(parser, offset, offset);
18280 
18281  statement = (pm_node_t *) pm_multi_write_node_create(parser, (pm_multi_target_node_t *) statement, &operator, value);
18282  statements->body.nodes[statements->body.size - 1] = statement;
18283 
18284  pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
18285  }
18286  }
18287 
18288  pop_block_exits(parser, previous_block_exits);
18289  pm_node_list_free(&current_block_exits);
18290 
18291  pm_void_statements_check(parser, statements, true);
18292  return (pm_node_t *) pm_parentheses_node_create(parser, &opening, (pm_node_t *) statements, &parser->previous);
18293  }
18294  case PM_TOKEN_BRACE_LEFT: {
18295  // If we were passed a current_hash_keys via the parser, then that
18296  // means we're already parsing a hash and we want to share the set
18297  // of hash keys with this inner hash we're about to parse for the
18298  // sake of warnings. We'll set it to NULL after we grab it to make
18299  // sure subsequent expressions don't use it. Effectively this is a
18300  // way of getting around passing it to every call to
18301  // parse_expression.
18302  pm_static_literals_t *current_hash_keys = parser->current_hash_keys;
18303  parser->current_hash_keys = NULL;
18304 
18305  pm_accepts_block_stack_push(parser, true);
18306  parser_lex(parser);
18307 
18308  pm_hash_node_t *node = pm_hash_node_create(parser, &parser->previous);
18309 
18310  if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
18311  if (current_hash_keys != NULL) {
18312  parse_assocs(parser, current_hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18313  } else {
18314  pm_static_literals_t hash_keys = { 0 };
18315  parse_assocs(parser, &hash_keys, (pm_node_t *) node, (uint16_t) (depth + 1));
18316  pm_static_literals_free(&hash_keys);
18317  }
18318 
18319  accept1(parser, PM_TOKEN_NEWLINE);
18320  }
18321 
18322  pm_accepts_block_stack_pop(parser);
18323  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM);
18324  pm_hash_node_closing_loc_set(node, &parser->previous);
18325 
18326  return (pm_node_t *) node;
18327  }
18329  parser_lex(parser);
18330 
18331  pm_token_t opening = parser->previous;
18332  opening.type = PM_TOKEN_STRING_BEGIN;
18333  opening.end = opening.start + 1;
18334 
18335  pm_token_t content = parser->previous;
18336  content.type = PM_TOKEN_STRING_CONTENT;
18337  content.start = content.start + 1;
18338 
18339  pm_token_t closing = not_provided(parser);
18340  pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
18341  pm_node_flag_set(node, parse_unescaped_encoding(parser));
18342 
18343  // Characters can be followed by strings in which case they are
18344  // automatically concatenated.
18345  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18346  return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18347  }
18348 
18349  return node;
18350  }
18351  case PM_TOKEN_CLASS_VARIABLE: {
18352  parser_lex(parser);
18353  pm_node_t *node = (pm_node_t *) pm_class_variable_read_node_create(parser, &parser->previous);
18354 
18355  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18356  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18357  }
18358 
18359  return node;
18360  }
18361  case PM_TOKEN_CONSTANT: {
18362  parser_lex(parser);
18363  pm_token_t constant = parser->previous;
18364 
18365  // If a constant is immediately followed by parentheses, then this is in
18366  // fact a method call, not a constant read.
18367  if (
18368  match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
18369  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18370  (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18371  match1(parser, PM_TOKEN_BRACE_LEFT)
18372  ) {
18373  pm_arguments_t arguments = { 0 };
18374  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18375  return (pm_node_t *) pm_call_node_fcall_create(parser, &constant, &arguments);
18376  }
18377 
18378  pm_node_t *node = (pm_node_t *) pm_constant_read_node_create(parser, &parser->previous);
18379 
18380  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18381  // If we get here, then we have a comma immediately following a
18382  // constant, so we're going to parse this as a multiple assignment.
18383  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18384  }
18385 
18386  return node;
18387  }
18388  case PM_TOKEN_UCOLON_COLON: {
18389  parser_lex(parser);
18390  pm_token_t delimiter = parser->previous;
18391 
18392  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
18393  pm_node_t *node = (pm_node_t *) pm_constant_path_node_create(parser, NULL, &delimiter, &parser->previous);
18394 
18395  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18396  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18397  }
18398 
18399  return node;
18400  }
18401  case PM_TOKEN_UDOT_DOT:
18402  case PM_TOKEN_UDOT_DOT_DOT: {
18403  pm_token_t operator = parser->current;
18404  parser_lex(parser);
18405 
18406  pm_node_t *right = parse_expression(parser, pm_binding_powers[operator.type].left, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
18407 
18408  // Unary .. and ... are special because these are non-associative
18409  // operators that can also be unary operators. In this case we need
18410  // to explicitly reject code that has a .. or ... that follows this
18411  // expression.
18412  if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
18413  pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
18414  }
18415 
18416  return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
18417  }
18418  case PM_TOKEN_FLOAT:
18419  parser_lex(parser);
18420  return (pm_node_t *) pm_float_node_create(parser, &parser->previous);
18422  parser_lex(parser);
18423  return (pm_node_t *) pm_float_node_imaginary_create(parser, &parser->previous);
18425  parser_lex(parser);
18426  return (pm_node_t *) pm_float_node_rational_create(parser, &parser->previous);
18428  parser_lex(parser);
18429  return (pm_node_t *) pm_float_node_rational_imaginary_create(parser, &parser->previous);
18431  parser_lex(parser);
18432  pm_node_t *node = (pm_node_t *) pm_numbered_reference_read_node_create(parser, &parser->previous);
18433 
18434  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18435  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18436  }
18437 
18438  return node;
18439  }
18440  case PM_TOKEN_GLOBAL_VARIABLE: {
18441  parser_lex(parser);
18442  pm_node_t *node = (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
18443 
18444  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18445  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18446  }
18447 
18448  return node;
18449  }
18450  case PM_TOKEN_BACK_REFERENCE: {
18451  parser_lex(parser);
18452  pm_node_t *node = (pm_node_t *) pm_back_reference_read_node_create(parser, &parser->previous);
18453 
18454  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18455  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18456  }
18457 
18458  return node;
18459  }
18460  case PM_TOKEN_IDENTIFIER:
18461  case PM_TOKEN_METHOD_NAME: {
18462  parser_lex(parser);
18463  pm_token_t identifier = parser->previous;
18464  pm_node_t *node = parse_variable_call(parser);
18465 
18466  if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
18467  // If parse_variable_call returned with a call node, then we
18468  // know the identifier is not in the local table. In that case
18469  // we need to check if there are arguments following the
18470  // identifier.
18471  pm_call_node_t *call = (pm_call_node_t *) node;
18472  pm_arguments_t arguments = { 0 };
18473 
18474  if (parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1))) {
18475  // Since we found arguments, we need to turn off the
18476  // variable call bit in the flags.
18477  pm_node_flag_unset((pm_node_t *)call, PM_CALL_NODE_FLAGS_VARIABLE_CALL);
18478 
18479  call->opening_loc = arguments.opening_loc;
18480  call->arguments = arguments.arguments;
18481  call->closing_loc = arguments.closing_loc;
18482  call->block = arguments.block;
18483 
18484  if (arguments.block != NULL) {
18485  call->base.location.end = arguments.block->location.end;
18486  } else if (arguments.closing_loc.start == NULL) {
18487  if (arguments.arguments != NULL) {
18488  call->base.location.end = arguments.arguments->base.location.end;
18489  } else {
18490  call->base.location.end = call->message_loc.end;
18491  }
18492  } else {
18493  call->base.location.end = arguments.closing_loc.end;
18494  }
18495  }
18496  } else {
18497  // Otherwise, we know the identifier is in the local table. This
18498  // can still be a method call if it is followed by arguments or
18499  // a block, so we need to check for that here.
18500  if (
18501  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
18502  (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
18503  match1(parser, PM_TOKEN_BRACE_LEFT)
18504  ) {
18505  pm_arguments_t arguments = { 0 };
18506  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
18507  pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
18508 
18510  // If we're about to convert an 'it' implicit local
18511  // variable read into a method call, we need to remove
18512  // it from the list of implicit local variables.
18513  parse_target_implicit_parameter(parser, node);
18514  } else {
18515  // Otherwise, we're about to convert a regular local
18516  // variable read into a method call, in which case we
18517  // need to indicate that this was not a read for the
18518  // purposes of warnings.
18520 
18521  if (pm_token_is_numbered_parameter(identifier.start, identifier.end)) {
18522  parse_target_implicit_parameter(parser, node);
18523  } else {
18525  pm_locals_unread(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
18526  }
18527  }
18528 
18529  pm_node_destroy(parser, node);
18530  return (pm_node_t *) fcall;
18531  }
18532  }
18533 
18534  if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
18535  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18536  }
18537 
18538  return node;
18539  }
18540  case PM_TOKEN_HEREDOC_START: {
18541  // Here we have found a heredoc. We'll parse it and add it to the
18542  // list of strings.
18543  assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
18544  pm_heredoc_lex_mode_t lex_mode = parser->lex_modes.current->as.heredoc.base;
18545 
18546  size_t common_whitespace = (size_t) -1;
18547  parser->lex_modes.current->as.heredoc.common_whitespace = &common_whitespace;
18548 
18549  parser_lex(parser);
18550  pm_token_t opening = parser->previous;
18551 
18552  pm_node_t *node;
18553  pm_node_t *part;
18554 
18555  if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18556  // If we get here, then we have an empty heredoc. We'll create
18557  // an empty content token and return an empty string node.
18558  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18559  pm_token_t content = parse_strings_empty_content(parser->previous.start);
18560 
18561  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18562  node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18563  } else {
18564  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_STRING_EMPTY);
18565  }
18566 
18567  node->location.end = opening.end;
18568  } else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18569  // If we get here, then we tried to find something in the
18570  // heredoc but couldn't actually parse anything, so we'll just
18571  // return a missing node.
18572  //
18573  // parse_string_part handles its own errors, so there is no need
18574  // for us to add one here.
18575  node = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
18576  } else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18577  // If we get here, then the part that we parsed was plain string
18578  // content and we're at the end of the heredoc, so we can return
18579  // just a string node with the heredoc opening and closing as
18580  // its opening and closing.
18581  pm_node_flag_set(part, parse_unescaped_encoding(parser));
18582  pm_string_node_t *cast = (pm_string_node_t *) part;
18583 
18584  cast->opening_loc = PM_LOCATION_TOKEN_VALUE(&opening);
18585  cast->closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->current);
18586  cast->base.location = cast->opening_loc;
18587 
18588  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18589  assert(sizeof(pm_string_node_t) == sizeof(pm_x_string_node_t));
18590  cast->base.type = PM_X_STRING_NODE;
18591  }
18592 
18593  if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18594  parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
18595  }
18596 
18597  node = (pm_node_t *) cast;
18598  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18599  } else {
18600  // If we get here, then we have multiple parts in the heredoc,
18601  // so we'll need to create an interpolated string node to hold
18602  // them all.
18603  pm_node_list_t parts = { 0 };
18604  pm_node_list_append(&parts, part);
18605 
18606  while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18607  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18608  pm_node_list_append(&parts, part);
18609  }
18610  }
18611 
18612  // Now that we have all of the parts, create the correct type of
18613  // interpolated node.
18614  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18615  pm_interpolated_x_string_node_t *cast = pm_interpolated_xstring_node_create(parser, &opening, &opening);
18616  cast->parts = parts;
18617 
18618  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18619  pm_interpolated_xstring_node_closing_set(cast, &parser->previous);
18620 
18621  cast->base.location = cast->opening_loc;
18622  node = (pm_node_t *) cast;
18623  } else {
18624  pm_interpolated_string_node_t *cast = pm_interpolated_string_node_create(parser, &opening, &parts, &opening);
18625  pm_node_list_free(&parts);
18626 
18627  expect1_heredoc_term(parser, lex_mode.ident_start, lex_mode.ident_length);
18628  pm_interpolated_string_node_closing_set(cast, &parser->previous);
18629 
18630  cast->base.location = cast->opening_loc;
18631  node = (pm_node_t *) cast;
18632  }
18633 
18634  // If this is a heredoc that is indented with a ~, then we need
18635  // to dedent each line by the common leading whitespace.
18636  if (lex_mode.indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
18637  pm_node_list_t *nodes;
18638  if (lex_mode.quote == PM_HEREDOC_QUOTE_BACKTICK) {
18639  nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
18640  } else {
18641  nodes = &((pm_interpolated_string_node_t *) node)->parts;
18642  }
18643 
18644  parse_heredoc_dedent(parser, nodes, common_whitespace);
18645  }
18646  }
18647 
18648  if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18649  return parse_strings(parser, node, false, (uint16_t) (depth + 1));
18650  }
18651 
18652  return node;
18653  }
18655  parser_lex(parser);
18656  pm_node_t *node = (pm_node_t *) pm_instance_variable_read_node_create(parser, &parser->previous);
18657 
18658  if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18659  node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18660  }
18661 
18662  return node;
18663  }
18664  case PM_TOKEN_INTEGER: {
18665  pm_node_flags_t base = parser->integer_base;
18666  parser_lex(parser);
18667  return (pm_node_t *) pm_integer_node_create(parser, base, &parser->previous);
18668  }
18670  pm_node_flags_t base = parser->integer_base;
18671  parser_lex(parser);
18672  return (pm_node_t *) pm_integer_node_imaginary_create(parser, base, &parser->previous);
18673  }
18675  pm_node_flags_t base = parser->integer_base;
18676  parser_lex(parser);
18677  return (pm_node_t *) pm_integer_node_rational_create(parser, base, &parser->previous);
18678  }
18680  pm_node_flags_t base = parser->integer_base;
18681  parser_lex(parser);
18682  return (pm_node_t *) pm_integer_node_rational_imaginary_create(parser, base, &parser->previous);
18683  }
18685  parser_lex(parser);
18686  return (pm_node_t *) pm_source_encoding_node_create(parser, &parser->previous);
18688  parser_lex(parser);
18689  return (pm_node_t *) pm_source_file_node_create(parser, &parser->previous);
18691  parser_lex(parser);
18692  return (pm_node_t *) pm_source_line_node_create(parser, &parser->previous);
18693  case PM_TOKEN_KEYWORD_ALIAS: {
18694  if (binding_power != PM_BINDING_POWER_STATEMENT) {
18695  pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18696  }
18697 
18698  parser_lex(parser);
18699  pm_token_t keyword = parser->previous;
18700 
18701  pm_node_t *new_name = parse_alias_argument(parser, true, (uint16_t) (depth + 1));
18702  pm_node_t *old_name = parse_alias_argument(parser, false, (uint16_t) (depth + 1));
18703 
18704  switch (PM_NODE_TYPE(new_name)) {
18710  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18711  }
18712  } else {
18713  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18714  }
18715 
18716  return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
18717  }
18718  case PM_SYMBOL_NODE:
18721  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18722  }
18723  }
18724  /* fallthrough */
18725  default:
18726  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18727  }
18728  }
18729  case PM_TOKEN_KEYWORD_CASE: {
18730  size_t opening_newline_index = token_newline_index(parser);
18731  parser_lex(parser);
18732 
18733  pm_token_t case_keyword = parser->previous;
18734  pm_node_t *predicate = NULL;
18735 
18736  pm_node_list_t current_block_exits = { 0 };
18737  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18738 
18739  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18740  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18741  predicate = NULL;
18742  } else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18743  predicate = NULL;
18744  } else if (!token_begins_expression_p(parser->current.type)) {
18745  predicate = NULL;
18746  } else {
18747  predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18748  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18749  }
18750 
18751  if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18752  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18753  parser_lex(parser);
18754 
18755  pop_block_exits(parser, previous_block_exits);
18756  pm_node_list_free(&current_block_exits);
18757 
18758  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18759  return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, &parser->previous);
18760  }
18761 
18762  // At this point we can create a case node, though we don't yet know
18763  // if it is a case-in or case-when node.
18764  pm_token_t end_keyword = not_provided(parser);
18765  pm_node_t *node;
18766 
18767  if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18768  pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, &end_keyword);
18769  pm_static_literals_t literals = { 0 };
18770 
18771  // At this point we've seen a when keyword, so we know this is a
18772  // case-when node. We will continue to parse the when nodes
18773  // until we hit the end of the list.
18774  while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18775  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18776  parser_lex(parser);
18777 
18778  pm_token_t when_keyword = parser->previous;
18779  pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18780 
18781  do {
18782  if (accept1(parser, PM_TOKEN_USTAR)) {
18783  pm_token_t operator = parser->previous;
18784  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18785 
18786  pm_splat_node_t *splat_node = pm_splat_node_create(parser, &operator, expression);
18787  pm_when_node_conditions_append(when_node, (pm_node_t *) splat_node);
18788 
18789  if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE)) break;
18790  } else {
18791  pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18792  pm_when_node_conditions_append(when_node, condition);
18793 
18794  // If we found a missing node, then this is a syntax
18795  // error and we should stop looping.
18796  if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE)) break;
18797 
18798  // If this is a string node, then we need to mark it
18799  // as frozen because when clause strings are frozen.
18800  if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18801  pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18802  } else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18803  pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18804  }
18805 
18806  pm_when_clause_static_literals_add(parser, &literals, condition);
18807  }
18808  } while (accept1(parser, PM_TOKEN_COMMA));
18809 
18810  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18811  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18812  pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18813  }
18814  } else {
18815  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18816  pm_when_node_then_keyword_loc_set(when_node, &parser->previous);
18817  }
18818 
18820  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_CASE_WHEN, (uint16_t) (depth + 1));
18821  if (statements != NULL) {
18822  pm_when_node_statements_set(when_node, statements);
18823  }
18824  }
18825 
18826  pm_case_node_condition_append(case_node, (pm_node_t *) when_node);
18827  }
18828 
18829  // If we didn't parse any conditions (in or when) then we need
18830  // to indicate that we have an error.
18831  if (case_node->conditions.size == 0) {
18832  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18833  }
18834 
18835  pm_static_literals_free(&literals);
18836  node = (pm_node_t *) case_node;
18837  } else {
18838  pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate, &end_keyword);
18839 
18840  // If this is a case-match node (i.e., it is a pattern matching
18841  // case statement) then we must have a predicate.
18842  if (predicate == NULL) {
18843  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18844  }
18845 
18846  // At this point we expect that we're parsing a case-in node. We
18847  // will continue to parse the in nodes until we hit the end of
18848  // the list.
18849  while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18850  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, true);
18851 
18852  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
18853  parser->pattern_matching_newlines = true;
18854 
18855  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18856  parser->command_start = false;
18857  parser_lex(parser);
18858 
18859  pm_token_t in_keyword = parser->previous;
18860 
18861  pm_constant_id_list_t captures = { 0 };
18862  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18863 
18864  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
18865  pm_constant_id_list_free(&captures);
18866 
18867  // Since we're in the top-level of the case-in node we need
18868  // to check for guard clauses in the form of `if` or
18869  // `unless` statements.
18870  if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18871  pm_token_t keyword = parser->previous;
18872  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18873  pattern = (pm_node_t *) pm_if_node_modifier_create(parser, pattern, &keyword, predicate);
18874  } else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18875  pm_token_t keyword = parser->previous;
18876  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18877  pattern = (pm_node_t *) pm_unless_node_modifier_create(parser, pattern, &keyword, predicate);
18878  }
18879 
18880  // Now we need to check for the terminator of the in node's
18881  // pattern. It can be a newline or semicolon optionally
18882  // followed by a `then` keyword.
18883  pm_token_t then_keyword;
18884  if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18885  if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18886  then_keyword = parser->previous;
18887  } else {
18888  then_keyword = not_provided(parser);
18889  }
18890  } else {
18891  expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18892  then_keyword = parser->previous;
18893  }
18894 
18895  // Now we can actually parse the statements associated with
18896  // the in node.
18897  pm_statements_node_t *statements;
18899  statements = NULL;
18900  } else {
18901  statements = parse_statements(parser, PM_CONTEXT_CASE_IN, (uint16_t) (depth + 1));
18902  }
18903 
18904  // Now that we have the full pattern and statements, we can
18905  // create the node and attach it to the case node.
18906  pm_node_t *condition = (pm_node_t *) pm_in_node_create(parser, pattern, statements, &in_keyword, &then_keyword);
18907  pm_case_match_node_condition_append(case_node, condition);
18908  }
18909 
18910  // If we didn't parse any conditions (in or when) then we need
18911  // to indicate that we have an error.
18912  if (case_node->conditions.size == 0) {
18913  pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18914  }
18915 
18916  node = (pm_node_t *) case_node;
18917  }
18918 
18919  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18920  if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18921  pm_token_t else_keyword = parser->previous;
18922  pm_else_node_t *else_node;
18923 
18924  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18925  else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser, PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->current);
18926  } else {
18927  else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->current);
18928  }
18929 
18930  if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18931  pm_case_node_else_clause_set((pm_case_node_t *) node, else_node);
18932  } else {
18933  pm_case_match_node_else_clause_set((pm_case_match_node_t *) node, else_node);
18934  }
18935  }
18936 
18937  parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword, false, false);
18938  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM);
18939 
18940  if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18941  pm_case_node_end_keyword_loc_set((pm_case_node_t *) node, &parser->previous);
18942  } else {
18943  pm_case_match_node_end_keyword_loc_set((pm_case_match_node_t *) node, &parser->previous);
18944  }
18945 
18946  pop_block_exits(parser, previous_block_exits);
18947  pm_node_list_free(&current_block_exits);
18948 
18949  return node;
18950  }
18951  case PM_TOKEN_KEYWORD_BEGIN: {
18952  size_t opening_newline_index = token_newline_index(parser);
18953  parser_lex(parser);
18954 
18955  pm_token_t begin_keyword = parser->previous;
18956  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18957 
18958  pm_node_list_t current_block_exits = { 0 };
18959  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18960  pm_statements_node_t *begin_statements = NULL;
18961 
18963  pm_accepts_block_stack_push(parser, true);
18964  begin_statements = parse_statements(parser, PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18965  pm_accepts_block_stack_pop(parser);
18966  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18967  }
18968 
18969  pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18970  parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18971  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM);
18972 
18973  begin_node->base.location.end = parser->previous.end;
18974  pm_begin_node_end_keyword_set(begin_node, &parser->previous);
18975 
18976  pop_block_exits(parser, previous_block_exits);
18977  pm_node_list_free(&current_block_exits);
18978 
18979  return (pm_node_t *) begin_node;
18980  }
18982  pm_node_list_t current_block_exits = { 0 };
18983  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
18984 
18985  if (binding_power != PM_BINDING_POWER_STATEMENT) {
18986  pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18987  }
18988 
18989  parser_lex(parser);
18990  pm_token_t keyword = parser->previous;
18991 
18992  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18993  pm_token_t opening = parser->previous;
18994  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_PREEXE, (uint16_t) (depth + 1));
18995 
18996  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
18997  pm_context_t context = parser->current_context->context;
18998  if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
18999  pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
19000  }
19001 
19002  flush_block_exits(parser, previous_block_exits);
19003  pm_node_list_free(&current_block_exits);
19004 
19005  return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19006  }
19008  case PM_TOKEN_KEYWORD_NEXT:
19009  case PM_TOKEN_KEYWORD_RETURN: {
19010  parser_lex(parser);
19011 
19012  pm_token_t keyword = parser->previous;
19013  pm_arguments_t arguments = { 0 };
19014 
19015  if (
19016  token_begins_expression_p(parser->current.type) ||
19017  match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
19018  ) {
19019  pm_binding_power_t binding_power = pm_binding_powers[parser->current.type].left;
19020 
19021  if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
19022  parse_arguments(parser, &arguments, false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
19023  }
19024  }
19025 
19026  switch (keyword.type) {
19027  case PM_TOKEN_KEYWORD_BREAK: {
19028  pm_node_t *node = (pm_node_t *) pm_break_node_create(parser, &keyword, arguments.arguments);
19029  if (!parser->partial_script) parse_block_exit(parser, node);
19030  return node;
19031  }
19032  case PM_TOKEN_KEYWORD_NEXT: {
19033  pm_node_t *node = (pm_node_t *) pm_next_node_create(parser, &keyword, arguments.arguments);
19034  if (!parser->partial_script) parse_block_exit(parser, node);
19035  return node;
19036  }
19037  case PM_TOKEN_KEYWORD_RETURN: {
19038  pm_node_t *node = (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
19039  parse_return(parser, node);
19040  return node;
19041  }
19042  default:
19043  assert(false && "unreachable");
19044  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
19045  }
19046  }
19047  case PM_TOKEN_KEYWORD_SUPER: {
19048  parser_lex(parser);
19049 
19050  pm_token_t keyword = parser->previous;
19051  pm_arguments_t arguments = { 0 };
19052  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
19053 
19054  if (
19055  arguments.opening_loc.start == NULL &&
19056  arguments.arguments == NULL &&
19057  ((arguments.block == NULL) || PM_NODE_TYPE_P(arguments.block, PM_BLOCK_NODE))
19058  ) {
19059  return (pm_node_t *) pm_forwarding_super_node_create(parser, &keyword, &arguments);
19060  }
19061 
19062  return (pm_node_t *) pm_super_node_create(parser, &keyword, &arguments);
19063  }
19064  case PM_TOKEN_KEYWORD_YIELD: {
19065  parser_lex(parser);
19066 
19067  pm_token_t keyword = parser->previous;
19068  pm_arguments_t arguments = { 0 };
19069  parse_arguments_list(parser, &arguments, false, accepts_command_call, (uint16_t) (depth + 1));
19070 
19071  // It's possible that we've parsed a block argument through our
19072  // call to parse_arguments_list. If we found one, we should mark it
19073  // as invalid and destroy it, as we don't have a place for it on the
19074  // yield node.
19075  if (arguments.block != NULL) {
19076  pm_parser_err_node(parser, arguments.block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
19077  pm_node_destroy(parser, arguments.block);
19078  arguments.block = NULL;
19079  }
19080 
19081  pm_node_t *node = (pm_node_t *) pm_yield_node_create(parser, &keyword, &arguments.opening_loc, arguments.arguments, &arguments.closing_loc);
19082  if (!parser->parsing_eval && !parser->partial_script) parse_yield(parser, node);
19083 
19084  return node;
19085  }
19086  case PM_TOKEN_KEYWORD_CLASS: {
19087  size_t opening_newline_index = token_newline_index(parser);
19088  parser_lex(parser);
19089 
19090  pm_token_t class_keyword = parser->previous;
19091  pm_do_loop_stack_push(parser, false);
19092 
19093  pm_node_list_t current_block_exits = { 0 };
19094  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19095 
19096  if (accept1(parser, PM_TOKEN_LESS_LESS)) {
19097  pm_token_t operator = parser->previous;
19098  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
19099 
19100  pm_parser_scope_push(parser, true);
19101  if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
19102  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_SINGLETON_CLASS_DELIMITER, pm_token_type_human(parser->current.type));
19103  }
19104 
19105  pm_node_t *statements = NULL;
19107  pm_accepts_block_stack_push(parser, true);
19108  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_SCLASS, (uint16_t) (depth + 1));
19109  pm_accepts_block_stack_pop(parser);
19110  }
19111 
19112  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19113  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19114  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1));
19115  } else {
19116  parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19117  }
19118 
19119  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19120 
19121  pm_constant_id_list_t locals;
19122  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19123 
19124  pm_parser_scope_pop(parser);
19125  pm_do_loop_stack_pop(parser);
19126 
19127  flush_block_exits(parser, previous_block_exits);
19128  pm_node_list_free(&current_block_exits);
19129 
19130  return (pm_node_t *) pm_singleton_class_node_create(parser, &locals, &class_keyword, &operator, expression, statements, &parser->previous);
19131  }
19132 
19133  pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
19134  pm_token_t name = parser->previous;
19135  if (name.type != PM_TOKEN_CONSTANT) {
19136  pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
19137  }
19138 
19139  pm_token_t inheritance_operator;
19140  pm_node_t *superclass;
19141 
19142  if (match1(parser, PM_TOKEN_LESS)) {
19143  inheritance_operator = parser->current;
19144  lex_state_set(parser, PM_LEX_STATE_BEG);
19145 
19146  parser->command_start = true;
19147  parser_lex(parser);
19148 
19149  superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
19150  } else {
19151  inheritance_operator = not_provided(parser);
19152  superclass = NULL;
19153  }
19154 
19155  pm_parser_scope_push(parser, true);
19156 
19157  if (inheritance_operator.type != PM_TOKEN_NOT_PROVIDED) {
19158  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
19159  } else {
19160  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19161  }
19162  pm_node_t *statements = NULL;
19163 
19165  pm_accepts_block_stack_push(parser, true);
19166  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_CLASS, (uint16_t) (depth + 1));
19167  pm_accepts_block_stack_pop(parser);
19168  }
19169 
19170  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
19171  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19172  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1));
19173  } else {
19174  parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword, false, false);
19175  }
19176 
19177  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
19178 
19179  if (context_def_p(parser)) {
19180  pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
19181  }
19182 
19183  pm_constant_id_list_t locals;
19184  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19185 
19186  pm_parser_scope_pop(parser);
19187  pm_do_loop_stack_pop(parser);
19188 
19189  if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
19190  pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
19191  }
19192 
19193  pop_block_exits(parser, previous_block_exits);
19194  pm_node_list_free(&current_block_exits);
19195 
19196  return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
19197  }
19198  case PM_TOKEN_KEYWORD_DEF: {
19199  pm_node_list_t current_block_exits = { 0 };
19200  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19201 
19202  pm_token_t def_keyword = parser->current;
19203  size_t opening_newline_index = token_newline_index(parser);
19204 
19205  pm_node_t *receiver = NULL;
19206  pm_token_t operator = not_provided(parser);
19207  pm_token_t name;
19208 
19209  // This context is necessary for lexing `...` in a bare params
19210  // correctly. It must be pushed before lexing the first param, so it
19211  // is here.
19212  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19213  parser_lex(parser);
19214 
19215  // This will be false if the method name is not a valid identifier
19216  // but could be followed by an operator.
19217  bool valid_name = true;
19218 
19219  switch (parser->current.type) {
19220  case PM_CASE_OPERATOR:
19221  pm_parser_scope_push(parser, true);
19222  lex_state_set(parser, PM_LEX_STATE_ENDFN);
19223  parser_lex(parser);
19224 
19225  name = parser->previous;
19226  break;
19227  case PM_TOKEN_IDENTIFIER: {
19228  parser_lex(parser);
19229 
19230  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19231  receiver = parse_variable_call(parser);
19232 
19233  pm_parser_scope_push(parser, true);
19234  lex_state_set(parser, PM_LEX_STATE_FNAME);
19235  parser_lex(parser);
19236 
19237  operator = parser->previous;
19238  name = parse_method_definition_name(parser);
19239  } else {
19240  pm_refute_numbered_parameter(parser, parser->previous.start, parser->previous.end);
19241  pm_parser_scope_push(parser, true);
19242 
19243  name = parser->previous;
19244  }
19245 
19246  break;
19247  }
19251  valid_name = false;
19252  /* fallthrough */
19253  case PM_TOKEN_CONSTANT:
19254  case PM_TOKEN_KEYWORD_NIL:
19255  case PM_TOKEN_KEYWORD_SELF:
19256  case PM_TOKEN_KEYWORD_TRUE:
19261  pm_parser_scope_push(parser, true);
19262  parser_lex(parser);
19263 
19264  pm_token_t identifier = parser->previous;
19265 
19266  if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
19267  lex_state_set(parser, PM_LEX_STATE_FNAME);
19268  parser_lex(parser);
19269  operator = parser->previous;
19270 
19271  switch (identifier.type) {
19272  case PM_TOKEN_CONSTANT:
19273  receiver = (pm_node_t *) pm_constant_read_node_create(parser, &identifier);
19274  break;
19276  receiver = (pm_node_t *) pm_instance_variable_read_node_create(parser, &identifier);
19277  break;
19279  receiver = (pm_node_t *) pm_class_variable_read_node_create(parser, &identifier);
19280  break;
19282  receiver = (pm_node_t *) pm_global_variable_read_node_create(parser, &identifier);
19283  break;
19284  case PM_TOKEN_KEYWORD_NIL:
19285  receiver = (pm_node_t *) pm_nil_node_create(parser, &identifier);
19286  break;
19287  case PM_TOKEN_KEYWORD_SELF:
19288  receiver = (pm_node_t *) pm_self_node_create(parser, &identifier);
19289  break;
19290  case PM_TOKEN_KEYWORD_TRUE:
19291  receiver = (pm_node_t *) pm_true_node_create(parser, &identifier);
19292  break;
19294  receiver = (pm_node_t *) pm_false_node_create(parser, &identifier);
19295  break;
19297  receiver = (pm_node_t *) pm_source_file_node_create(parser, &identifier);
19298  break;
19300  receiver = (pm_node_t *) pm_source_line_node_create(parser, &identifier);
19301  break;
19303  receiver = (pm_node_t *) pm_source_encoding_node_create(parser, &identifier);
19304  break;
19305  default:
19306  break;
19307  }
19308 
19309  name = parse_method_definition_name(parser);
19310  } else {
19311  if (!valid_name) {
19312  PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19313  }
19314 
19315  name = identifier;
19316  }
19317  break;
19318  }
19320  // The current context is `PM_CONTEXT_DEF_PARAMS`, however
19321  // the inner expression of this parenthesis should not be
19322  // processed under this context. Thus, the context is popped
19323  // here.
19324  context_pop(parser);
19325  parser_lex(parser);
19326 
19327  pm_token_t lparen = parser->previous;
19328  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
19329 
19330  accept1(parser, PM_TOKEN_NEWLINE);
19331  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19332  pm_token_t rparen = parser->previous;
19333 
19334  lex_state_set(parser, PM_LEX_STATE_FNAME);
19335  expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
19336 
19337  operator = parser->previous;
19338  receiver = (pm_node_t *) pm_parentheses_node_create(parser, &lparen, expression, &rparen);
19339 
19340  // To push `PM_CONTEXT_DEF_PARAMS` again is for the same
19341  // reason as described the above.
19342  pm_parser_scope_push(parser, true);
19343  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19344  name = parse_method_definition_name(parser);
19345  break;
19346  }
19347  default:
19348  pm_parser_scope_push(parser, true);
19349  name = parse_method_definition_name(parser);
19350  break;
19351  }
19352 
19353  pm_token_t lparen;
19354  pm_token_t rparen;
19355  pm_parameters_node_t *params;
19356 
19357  switch (parser->current.type) {
19359  parser_lex(parser);
19360  lparen = parser->previous;
19361 
19362  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19363  params = NULL;
19364  } else {
19365  params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19366  }
19367 
19368  lex_state_set(parser, PM_LEX_STATE_BEG);
19369  parser->command_start = true;
19370 
19371  context_pop(parser);
19372  if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19373  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_DEF_PARAMS_TERM_PAREN, pm_token_type_human(parser->current.type));
19374  parser->previous.start = parser->previous.end;
19375  parser->previous.type = PM_TOKEN_MISSING;
19376  }
19377 
19378  rparen = parser->previous;
19379  break;
19380  }
19381  case PM_CASE_PARAMETER: {
19382  // If we're about to lex a label, we need to add the label
19383  // state to make sure the next newline is ignored.
19384  if (parser->current.type == PM_TOKEN_LABEL) {
19385  lex_state_set(parser, parser->lex_state | PM_LEX_STATE_LABEL);
19386  }
19387 
19388  lparen = not_provided(parser);
19389  rparen = not_provided(parser);
19390  params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19391 
19392  context_pop(parser);
19393  break;
19394  }
19395  default: {
19396  lparen = not_provided(parser);
19397  rparen = not_provided(parser);
19398  params = NULL;
19399 
19400  context_pop(parser);
19401  break;
19402  }
19403  }
19404 
19405  pm_node_t *statements = NULL;
19406  pm_token_t equal;
19407  pm_token_t end_keyword;
19408 
19409  if (accept1(parser, PM_TOKEN_EQUAL)) {
19410  if (token_is_setter_name(&name)) {
19411  pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
19412  }
19413  equal = parser->previous;
19414 
19415  context_push(parser, PM_CONTEXT_DEF);
19416  pm_do_loop_stack_push(parser, false);
19417  statements = (pm_node_t *) pm_statements_node_create(parser);
19418 
19419  pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, binding_power < PM_BINDING_POWER_COMPOSITION, false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
19420 
19421  if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
19422  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
19423 
19424  pm_token_t rescue_keyword = parser->previous;
19425  pm_node_t *value = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
19426  context_pop(parser);
19427 
19428  statement = (pm_node_t *) pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value);
19429  }
19430 
19431  pm_statements_node_body_append(parser, (pm_statements_node_t *) statements, statement, false);
19432  pm_do_loop_stack_pop(parser);
19433  context_pop(parser);
19434  end_keyword = not_provided(parser);
19435  } else {
19436  equal = not_provided(parser);
19437 
19438  if (lparen.type == PM_TOKEN_NOT_PROVIDED) {
19439  lex_state_set(parser, PM_LEX_STATE_BEG);
19440  parser->command_start = true;
19441  expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
19442  } else {
19443  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19444  }
19445 
19446  pm_accepts_block_stack_push(parser, true);
19447  pm_do_loop_stack_push(parser, false);
19448 
19450  pm_accepts_block_stack_push(parser, true);
19451  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_DEF, (uint16_t) (depth + 1));
19452  pm_accepts_block_stack_pop(parser);
19453  }
19454 
19456  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19457  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1));
19458  } else {
19459  parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword, false, false);
19460  }
19461 
19462  pm_accepts_block_stack_pop(parser);
19463  pm_do_loop_stack_pop(parser);
19464 
19465  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM);
19466  end_keyword = parser->previous;
19467  }
19468 
19469  pm_constant_id_list_t locals;
19470  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19471  pm_parser_scope_pop(parser);
19472 
19478  pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
19479 
19480  flush_block_exits(parser, previous_block_exits);
19481  pm_node_list_free(&current_block_exits);
19482 
19483  return (pm_node_t *) pm_def_node_create(
19484  parser,
19485  name_id,
19486  &name,
19487  receiver,
19488  params,
19489  statements,
19490  &locals,
19491  &def_keyword,
19492  &operator,
19493  &lparen,
19494  &rparen,
19495  &equal,
19496  &end_keyword
19497  );
19498  }
19499  case PM_TOKEN_KEYWORD_DEFINED: {
19500  parser_lex(parser);
19501  pm_token_t keyword = parser->previous;
19502 
19503  pm_token_t lparen;
19504  pm_token_t rparen;
19505  pm_node_t *expression;
19506  context_push(parser, PM_CONTEXT_DEFINED);
19507 
19508  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19509  lparen = parser->previous;
19510  expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19511 
19512  if (parser->recovering) {
19513  rparen = not_provided(parser);
19514  } else {
19515  accept1(parser, PM_TOKEN_NEWLINE);
19516  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19517  rparen = parser->previous;
19518  }
19519  } else {
19520  lparen = not_provided(parser);
19521  rparen = not_provided(parser);
19522  expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19523  }
19524 
19525  context_pop(parser);
19526  return (pm_node_t *) pm_defined_node_create(
19527  parser,
19528  &lparen,
19529  expression,
19530  &rparen,
19531  &PM_LOCATION_TOKEN_VALUE(&keyword)
19532  );
19533  }
19535  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19536  pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19537  }
19538 
19539  parser_lex(parser);
19540  pm_token_t keyword = parser->previous;
19541 
19542  if (context_def_p(parser)) {
19543  pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19544  }
19545 
19546  expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19547  pm_token_t opening = parser->previous;
19548  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_POSTEXE, (uint16_t) (depth + 1));
19549 
19550  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM);
19551  return (pm_node_t *) pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
19552  }
19554  parser_lex(parser);
19555  return (pm_node_t *) pm_false_node_create(parser, &parser->previous);
19556  case PM_TOKEN_KEYWORD_FOR: {
19557  size_t opening_newline_index = token_newline_index(parser);
19558  parser_lex(parser);
19559 
19560  pm_token_t for_keyword = parser->previous;
19561  pm_node_t *index;
19562 
19563  context_push(parser, PM_CONTEXT_FOR_INDEX);
19564 
19565  // First, parse out the first index expression.
19566  if (accept1(parser, PM_TOKEN_USTAR)) {
19567  pm_token_t star_operator = parser->previous;
19568  pm_node_t *name = NULL;
19569 
19570  if (token_begins_expression_p(parser->current.type)) {
19571  name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19572  }
19573 
19574  index = (pm_node_t *) pm_splat_node_create(parser, &star_operator, name);
19575  } else if (token_begins_expression_p(parser->current.type)) {
19576  index = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19577  } else {
19578  pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19579  index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
19580  }
19581 
19582  // Now, if there are multiple index expressions, parse them out.
19583  if (match1(parser, PM_TOKEN_COMMA)) {
19584  index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19585  } else {
19586  index = parse_target(parser, index, false, false);
19587  }
19588 
19589  context_pop(parser);
19590  pm_do_loop_stack_push(parser, true);
19591 
19592  expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19593  pm_token_t in_keyword = parser->previous;
19594 
19595  pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19596  pm_do_loop_stack_pop(parser);
19597 
19598  pm_token_t do_keyword;
19599  if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19600  do_keyword = parser->previous;
19601  } else {
19602  do_keyword = not_provided(parser);
19603  if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19604  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_FOR_DELIMITER, pm_token_type_human(parser->current.type));
19605  }
19606  }
19607 
19608  pm_statements_node_t *statements = NULL;
19609  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19610  statements = parse_statements(parser, PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19611  }
19612 
19613  parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword, false, false);
19614  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
19615 
19616  return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
19617  }
19618  case PM_TOKEN_KEYWORD_IF:
19619  if (parser_end_of_line_p(parser)) {
19620  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_KEYWORD_EOL);
19621  }
19622 
19623  size_t opening_newline_index = token_newline_index(parser);
19624  bool if_after_else = parser->previous.type == PM_TOKEN_KEYWORD_ELSE;
19625  parser_lex(parser);
19626 
19627  return parse_conditional(parser, PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19628  case PM_TOKEN_KEYWORD_UNDEF: {
19629  if (binding_power != PM_BINDING_POWER_STATEMENT) {
19630  pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19631  }
19632 
19633  parser_lex(parser);
19634  pm_undef_node_t *undef = pm_undef_node_create(parser, &parser->previous);
19635  pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19636 
19637  if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19638  pm_node_destroy(parser, name);
19639  } else {
19640  pm_undef_node_append(undef, name);
19641 
19642  while (match1(parser, PM_TOKEN_COMMA)) {
19643  lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19644  parser_lex(parser);
19645  name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19646 
19647  if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19648  pm_node_destroy(parser, name);
19649  break;
19650  }
19651 
19652  pm_undef_node_append(undef, name);
19653  }
19654  }
19655 
19656  return (pm_node_t *) undef;
19657  }
19658  case PM_TOKEN_KEYWORD_NOT: {
19659  parser_lex(parser);
19660 
19661  pm_token_t message = parser->previous;
19662  pm_arguments_t arguments = { 0 };
19663  pm_node_t *receiver = NULL;
19664 
19665  accept1(parser, PM_TOKEN_NEWLINE);
19666 
19667  if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19668  arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19669 
19670  if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19671  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19672  } else {
19673  receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19674 
19675  if (!parser->recovering) {
19676  accept1(parser, PM_TOKEN_NEWLINE);
19677  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19678  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
19679  }
19680  }
19681  } else {
19682  receiver = parse_expression(parser, PM_BINDING_POWER_NOT, true, false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19683  }
19684 
19685  return (pm_node_t *) pm_call_node_not_create(parser, receiver, &message, &arguments);
19686  }
19687  case PM_TOKEN_KEYWORD_UNLESS: {
19688  size_t opening_newline_index = token_newline_index(parser);
19689  parser_lex(parser);
19690 
19691  return parse_conditional(parser, PM_CONTEXT_UNLESS, opening_newline_index, false, (uint16_t) (depth + 1));
19692  }
19693  case PM_TOKEN_KEYWORD_MODULE: {
19694  pm_node_list_t current_block_exits = { 0 };
19695  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
19696 
19697  size_t opening_newline_index = token_newline_index(parser);
19698  parser_lex(parser);
19699  pm_token_t module_keyword = parser->previous;
19700 
19701  pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19702  pm_token_t name;
19703 
19704  // If we can recover from a syntax error that occurred while parsing
19705  // the name of the module, then we'll handle that here.
19706  if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19707  pop_block_exits(parser, previous_block_exits);
19708  pm_node_list_free(&current_block_exits);
19709 
19710  pm_token_t missing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19711  return (pm_node_t *) pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing);
19712  }
19713 
19714  while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19715  pm_token_t double_colon = parser->previous;
19716 
19717  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19718  constant_path = (pm_node_t *) pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->previous);
19719  }
19720 
19721  // Here we retrieve the name of the module. If it wasn't a constant,
19722  // then it's possible that `module foo` was passed, which is a
19723  // syntax error. We handle that here as well.
19724  name = parser->previous;
19725  if (name.type != PM_TOKEN_CONSTANT) {
19726  pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19727  }
19728 
19729  pm_parser_scope_push(parser, true);
19730  accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19731  pm_node_t *statements = NULL;
19732 
19734  pm_accepts_block_stack_push(parser, true);
19735  statements = (pm_node_t *) parse_statements(parser, PM_CONTEXT_MODULE, (uint16_t) (depth + 1));
19736  pm_accepts_block_stack_pop(parser);
19737  }
19738 
19740  assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19741  statements = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.start, (pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1));
19742  } else {
19743  parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword, false, false);
19744  }
19745 
19746  pm_constant_id_list_t locals;
19747  pm_locals_order(parser, &parser->current_scope->locals, &locals, false);
19748 
19749  pm_parser_scope_pop(parser);
19750  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
19751 
19752  if (context_def_p(parser)) {
19753  pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19754  }
19755 
19756  pop_block_exits(parser, previous_block_exits);
19757  pm_node_list_free(&current_block_exits);
19758 
19759  return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
19760  }
19761  case PM_TOKEN_KEYWORD_NIL:
19762  parser_lex(parser);
19763  return (pm_node_t *) pm_nil_node_create(parser, &parser->previous);
19764  case PM_TOKEN_KEYWORD_REDO: {
19765  parser_lex(parser);
19766 
19767  pm_node_t *node = (pm_node_t *) pm_redo_node_create(parser, &parser->previous);
19768  if (!parser->partial_script) parse_block_exit(parser, node);
19769 
19770  return node;
19771  }
19772  case PM_TOKEN_KEYWORD_RETRY: {
19773  parser_lex(parser);
19774 
19775  pm_node_t *node = (pm_node_t *) pm_retry_node_create(parser, &parser->previous);
19776  parse_retry(parser, node);
19777 
19778  return node;
19779  }
19780  case PM_TOKEN_KEYWORD_SELF:
19781  parser_lex(parser);
19782  return (pm_node_t *) pm_self_node_create(parser, &parser->previous);
19783  case PM_TOKEN_KEYWORD_TRUE:
19784  parser_lex(parser);
19785  return (pm_node_t *) pm_true_node_create(parser, &parser->previous);
19786  case PM_TOKEN_KEYWORD_UNTIL: {
19787  size_t opening_newline_index = token_newline_index(parser);
19788 
19789  context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19790  pm_do_loop_stack_push(parser, true);
19791 
19792  parser_lex(parser);
19793  pm_token_t keyword = parser->previous;
19794  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19795 
19796  pm_do_loop_stack_pop(parser);
19797  context_pop(parser);
19798 
19799  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19800  pm_statements_node_t *statements = NULL;
19801 
19802  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19803  pm_accepts_block_stack_push(parser, true);
19804  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19805  pm_accepts_block_stack_pop(parser);
19806  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19807  }
19808 
19809  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19810  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19811 
19812  return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19813  }
19814  case PM_TOKEN_KEYWORD_WHILE: {
19815  size_t opening_newline_index = token_newline_index(parser);
19816 
19817  context_push(parser, PM_CONTEXT_LOOP_PREDICATE);
19818  pm_do_loop_stack_push(parser, true);
19819 
19820  parser_lex(parser);
19821  pm_token_t keyword = parser->previous;
19822  pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19823 
19824  pm_do_loop_stack_pop(parser);
19825  context_pop(parser);
19826 
19827  expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19828  pm_statements_node_t *statements = NULL;
19829 
19830  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19831  pm_accepts_block_stack_push(parser, true);
19832  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19833  pm_accepts_block_stack_pop(parser);
19834  accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19835  }
19836 
19837  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19838  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19839 
19840  return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19841  }
19842  case PM_TOKEN_PERCENT_LOWER_I: {
19843  parser_lex(parser);
19844  pm_token_t opening = parser->previous;
19845  pm_array_node_t *array = pm_array_node_create(parser, &opening);
19846 
19847  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19848  accept1(parser, PM_TOKEN_WORDS_SEP);
19849  if (match1(parser, PM_TOKEN_STRING_END)) break;
19850 
19851  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19852  pm_token_t opening = not_provided(parser);
19853  pm_token_t closing = not_provided(parser);
19854  pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
19855  }
19856 
19857  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19858  }
19859 
19860  pm_token_t closing = parser->current;
19861  if (match1(parser, PM_TOKEN_EOF)) {
19862  pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19863  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
19864  } else {
19865  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19866  }
19867  pm_array_node_close_set(array, &closing);
19868 
19869  return (pm_node_t *) array;
19870  }
19871  case PM_TOKEN_PERCENT_UPPER_I: {
19872  parser_lex(parser);
19873  pm_token_t opening = parser->previous;
19874  pm_array_node_t *array = pm_array_node_create(parser, &opening);
19875 
19876  // This is the current node that we are parsing that will be added to the
19877  // list of elements.
19878  pm_node_t *current = NULL;
19879 
19880  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19881  switch (parser->current.type) {
19882  case PM_TOKEN_WORDS_SEP: {
19883  if (current == NULL) {
19884  // If we hit a separator before we have any content, then we don't
19885  // need to do anything.
19886  } else {
19887  // If we hit a separator after we've hit content, then we need to
19888  // append that content to the list and reset the current node.
19889  pm_array_node_elements_append(array, current);
19890  current = NULL;
19891  }
19892 
19893  parser_lex(parser);
19894  break;
19895  }
19896  case PM_TOKEN_STRING_CONTENT: {
19897  pm_token_t opening = not_provided(parser);
19898  pm_token_t closing = not_provided(parser);
19899 
19900  if (current == NULL) {
19901  // If we hit content and the current node is NULL, then this is
19902  // the first string content we've seen. In that case we're going
19903  // to create a new string node and set that to the current.
19904  current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
19905  parser_lex(parser);
19906  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19907  // If we hit string content and the current node is an
19908  // interpolated string, then we need to append the string content
19909  // to the list of child nodes.
19910  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
19911  parser_lex(parser);
19912 
19913  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
19914  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19915  // If we hit string content and the current node is a symbol node,
19916  // then we need to convert the current node into an interpolated
19917  // string and add the string content to the list of child nodes.
19918  pm_symbol_node_t *cast = (pm_symbol_node_t *) current;
19919  pm_token_t bounds = not_provided(parser);
19920 
19921  pm_token_t content = { .type = PM_TOKEN_STRING_CONTENT, .start = cast->value_loc.start, .end = cast->value_loc.end };
19922  pm_node_t *first_string = (pm_node_t *) pm_string_node_create_unescaped(parser, &bounds, &content, &bounds, &cast->unescaped);
19923  pm_node_t *second_string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
19924  parser_lex(parser);
19925 
19926  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19927  pm_interpolated_symbol_node_append(interpolated, first_string);
19928  pm_interpolated_symbol_node_append(interpolated, second_string);
19929 
19930  xfree(current);
19931  current = (pm_node_t *) interpolated;
19932  } else {
19933  assert(false && "unreachable");
19934  }
19935 
19936  break;
19937  }
19938  case PM_TOKEN_EMBVAR: {
19939  bool start_location_set = false;
19940  if (current == NULL) {
19941  // If we hit an embedded variable and the current node is NULL,
19942  // then this is the start of a new string. We'll set the current
19943  // node to a new interpolated string.
19944  pm_token_t opening = not_provided(parser);
19945  pm_token_t closing = not_provided(parser);
19946  current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19947  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19948  // If we hit an embedded variable and the current node is a string
19949  // node, then we'll convert the current into an interpolated
19950  // string and add the string node to the list of parts.
19951  pm_token_t opening = not_provided(parser);
19952  pm_token_t closing = not_provided(parser);
19953  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19954 
19955  current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
19956  pm_interpolated_symbol_node_append(interpolated, current);
19957  interpolated->base.location.start = current->location.start;
19958  start_location_set = true;
19959  current = (pm_node_t *) interpolated;
19960  } else {
19961  // If we hit an embedded variable and the current node is an
19962  // interpolated string, then we'll just add the embedded variable.
19963  }
19964 
19965  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19966  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
19967  if (!start_location_set) {
19968  current->location.start = part->location.start;
19969  }
19970  break;
19971  }
19972  case PM_TOKEN_EMBEXPR_BEGIN: {
19973  bool start_location_set = false;
19974  if (current == NULL) {
19975  // If we hit an embedded expression and the current node is NULL,
19976  // then this is the start of a new string. We'll set the current
19977  // node to a new interpolated string.
19978  pm_token_t opening = not_provided(parser);
19979  pm_token_t closing = not_provided(parser);
19980  current = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19981  } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19982  // If we hit an embedded expression and the current node is a
19983  // string node, then we'll convert the current into an
19984  // interpolated string and add the string node to the list of
19985  // parts.
19986  pm_token_t opening = not_provided(parser);
19987  pm_token_t closing = not_provided(parser);
19988  pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
19989 
19990  current = (pm_node_t *) pm_symbol_node_to_string_node(parser, (pm_symbol_node_t *) current);
19991  pm_interpolated_symbol_node_append(interpolated, current);
19992  interpolated->base.location.start = current->location.start;
19993  start_location_set = true;
19994  current = (pm_node_t *) interpolated;
19995  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19996  // If we hit an embedded expression and the current node is an
19997  // interpolated string, then we'll just continue on.
19998  } else {
19999  assert(false && "unreachable");
20000  }
20001 
20002  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20003  pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
20004  if (!start_location_set) {
20005  current->location.start = part->location.start;
20006  }
20007  break;
20008  }
20009  default:
20010  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
20011  parser_lex(parser);
20012  break;
20013  }
20014  }
20015 
20016  // If we have a current node, then we need to append it to the list.
20017  if (current) {
20018  pm_array_node_elements_append(array, current);
20019  }
20020 
20021  pm_token_t closing = parser->current;
20022  if (match1(parser, PM_TOKEN_EOF)) {
20023  pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
20024  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20025  } else {
20026  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
20027  }
20028  pm_array_node_close_set(array, &closing);
20029 
20030  return (pm_node_t *) array;
20031  }
20032  case PM_TOKEN_PERCENT_LOWER_W: {
20033  parser_lex(parser);
20034  pm_token_t opening = parser->previous;
20035  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20036 
20037  // skip all leading whitespaces
20038  accept1(parser, PM_TOKEN_WORDS_SEP);
20039 
20040  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20041  accept1(parser, PM_TOKEN_WORDS_SEP);
20042  if (match1(parser, PM_TOKEN_STRING_END)) break;
20043 
20044  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20045  pm_token_t opening = not_provided(parser);
20046  pm_token_t closing = not_provided(parser);
20047 
20048  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20049  pm_array_node_elements_append(array, string);
20050  }
20051 
20052  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
20053  }
20054 
20055  pm_token_t closing = parser->current;
20056  if (match1(parser, PM_TOKEN_EOF)) {
20057  pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
20058  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20059  } else {
20060  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
20061  }
20062 
20063  pm_array_node_close_set(array, &closing);
20064  return (pm_node_t *) array;
20065  }
20066  case PM_TOKEN_PERCENT_UPPER_W: {
20067  parser_lex(parser);
20068  pm_token_t opening = parser->previous;
20069  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20070 
20071  // This is the current node that we are parsing that will be added
20072  // to the list of elements.
20073  pm_node_t *current = NULL;
20074 
20075  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20076  switch (parser->current.type) {
20077  case PM_TOKEN_WORDS_SEP: {
20078  // Reset the explicit encoding if we hit a separator
20079  // since each element can have its own encoding.
20080  parser->explicit_encoding = NULL;
20081 
20082  if (current == NULL) {
20083  // If we hit a separator before we have any content,
20084  // then we don't need to do anything.
20085  } else {
20086  // If we hit a separator after we've hit content,
20087  // then we need to append that content to the list
20088  // and reset the current node.
20089  pm_array_node_elements_append(array, current);
20090  current = NULL;
20091  }
20092 
20093  parser_lex(parser);
20094  break;
20095  }
20096  case PM_TOKEN_STRING_CONTENT: {
20097  pm_token_t opening = not_provided(parser);
20098  pm_token_t closing = not_provided(parser);
20099 
20100  pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
20101  pm_node_flag_set(string, parse_unescaped_encoding(parser));
20102  parser_lex(parser);
20103 
20104  if (current == NULL) {
20105  // If we hit content and the current node is NULL,
20106  // then this is the first string content we've seen.
20107  // In that case we're going to create a new string
20108  // node and set that to the current.
20109  current = string;
20110  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20111  // If we hit string content and the current node is
20112  // an interpolated string, then we need to append
20113  // the string content to the list of child nodes.
20114  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
20115  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20116  // If we hit string content and the current node is
20117  // a string node, then we need to convert the
20118  // current node into an interpolated string and add
20119  // the string content to the list of child nodes.
20120  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20121  pm_interpolated_string_node_append(interpolated, current);
20122  pm_interpolated_string_node_append(interpolated, string);
20123  current = (pm_node_t *) interpolated;
20124  } else {
20125  assert(false && "unreachable");
20126  }
20127 
20128  break;
20129  }
20130  case PM_TOKEN_EMBVAR: {
20131  if (current == NULL) {
20132  // If we hit an embedded variable and the current
20133  // node is NULL, then this is the start of a new
20134  // string. We'll set the current node to a new
20135  // interpolated string.
20136  pm_token_t opening = not_provided(parser);
20137  pm_token_t closing = not_provided(parser);
20138  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20139  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20140  // If we hit an embedded variable and the current
20141  // node is a string node, then we'll convert the
20142  // current into an interpolated string and add the
20143  // string node to the list of parts.
20144  pm_token_t opening = not_provided(parser);
20145  pm_token_t closing = not_provided(parser);
20146  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20147  pm_interpolated_string_node_append(interpolated, current);
20148  current = (pm_node_t *) interpolated;
20149  } else {
20150  // If we hit an embedded variable and the current
20151  // node is an interpolated string, then we'll just
20152  // add the embedded variable.
20153  }
20154 
20155  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20156  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20157  break;
20158  }
20159  case PM_TOKEN_EMBEXPR_BEGIN: {
20160  if (current == NULL) {
20161  // If we hit an embedded expression and the current
20162  // node is NULL, then this is the start of a new
20163  // string. We'll set the current node to a new
20164  // interpolated string.
20165  pm_token_t opening = not_provided(parser);
20166  pm_token_t closing = not_provided(parser);
20167  current = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20168  } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
20169  // If we hit an embedded expression and the current
20170  // node is a string node, then we'll convert the
20171  // current into an interpolated string and add the
20172  // string node to the list of parts.
20173  pm_token_t opening = not_provided(parser);
20174  pm_token_t closing = not_provided(parser);
20175  pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
20176  pm_interpolated_string_node_append(interpolated, current);
20177  current = (pm_node_t *) interpolated;
20178  } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
20179  // If we hit an embedded expression and the current
20180  // node is an interpolated string, then we'll just
20181  // continue on.
20182  } else {
20183  assert(false && "unreachable");
20184  }
20185 
20186  pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
20187  pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
20188  break;
20189  }
20190  default:
20191  expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
20192  parser_lex(parser);
20193  break;
20194  }
20195  }
20196 
20197  // If we have a current node, then we need to append it to the list.
20198  if (current) {
20199  pm_array_node_elements_append(array, current);
20200  }
20201 
20202  pm_token_t closing = parser->current;
20203  if (match1(parser, PM_TOKEN_EOF)) {
20204  pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
20205  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20206  } else {
20207  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
20208  }
20209 
20210  pm_array_node_close_set(array, &closing);
20211  return (pm_node_t *) array;
20212  }
20213  case PM_TOKEN_REGEXP_BEGIN: {
20214  pm_token_t opening = parser->current;
20215  parser_lex(parser);
20216 
20217  if (match1(parser, PM_TOKEN_REGEXP_END)) {
20218  // If we get here, then we have an end immediately after a start. In
20219  // that case we'll create an empty content token and return an
20220  // uninterpolated regular expression.
20221  pm_token_t content = (pm_token_t) {
20223  .start = parser->previous.end,
20224  .end = parser->previous.end
20225  };
20226 
20227  parser_lex(parser);
20228 
20229  pm_node_t *node = (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
20231 
20232  return node;
20233  }
20234 
20236 
20237  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20238  // In this case we've hit string content so we know the regular
20239  // expression at least has something in it. We'll need to check if the
20240  // following token is the end (in which case we can return a plain
20241  // regular expression) or if it's not then it has interpolation.
20242  pm_string_t unescaped = parser->current_string;
20243  pm_token_t content = parser->current;
20244  bool ascii_only = parser->current_regular_expression_ascii_only;
20245  parser_lex(parser);
20246 
20247  // If we hit an end, then we can create a regular expression
20248  // node without interpolation, which can be represented more
20249  // succinctly and more easily compiled.
20250  if (accept1(parser, PM_TOKEN_REGEXP_END)) {
20251  pm_regular_expression_node_t *node = (pm_regular_expression_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
20252 
20253  // If we're not immediately followed by a =~, then we want
20254  // to parse all of the errors at this point. If it is
20255  // followed by a =~, then it will get parsed higher up while
20256  // parsing the named captures as well.
20257  if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
20258  parse_regular_expression_errors(parser, node);
20259  }
20260 
20261  pm_node_flag_set((pm_node_t *) node, parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, node->base.flags));
20262  return (pm_node_t *) node;
20263  }
20264 
20265  // If we get here, then we have interpolation so we'll need to create
20266  // a regular expression node with interpolation.
20267  interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20268 
20269  pm_token_t opening = not_provided(parser);
20270  pm_token_t closing = not_provided(parser);
20271  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20272 
20273  if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
20274  // This is extremely strange, but the first string part of a
20275  // regular expression will always be tagged as binary if we
20276  // are in a US-ASCII file, no matter its contents.
20277  pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
20278  }
20279 
20280  pm_interpolated_regular_expression_node_append(interpolated, part);
20281  } else {
20282  // If the first part of the body of the regular expression is not a
20283  // string content, then we have interpolation and we need to create an
20284  // interpolated regular expression node.
20285  interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
20286  }
20287 
20288  // Now that we're here and we have interpolation, we'll parse all of the
20289  // parts into the list.
20290  pm_node_t *part;
20291  while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
20292  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20293  pm_interpolated_regular_expression_node_append(interpolated, part);
20294  }
20295  }
20296 
20297  pm_token_t closing = parser->current;
20298  if (match1(parser, PM_TOKEN_EOF)) {
20299  pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
20300  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20301  } else {
20302  expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
20303  }
20304 
20305  pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
20306  return (pm_node_t *) interpolated;
20307  }
20308  case PM_TOKEN_BACKTICK:
20309  case PM_TOKEN_PERCENT_LOWER_X: {
20310  parser_lex(parser);
20311  pm_token_t opening = parser->previous;
20312 
20313  // When we get here, we don't know if this string is going to have
20314  // interpolation or not, even though it is allowed. Still, we want to be
20315  // able to return a string node without interpolation if we can since
20316  // it'll be faster.
20317  if (match1(parser, PM_TOKEN_STRING_END)) {
20318  // If we get here, then we have an end immediately after a start. In
20319  // that case we'll create an empty content token and return an
20320  // uninterpolated string.
20321  pm_token_t content = (pm_token_t) {
20323  .start = parser->previous.end,
20324  .end = parser->previous.end
20325  };
20326 
20327  parser_lex(parser);
20328  return (pm_node_t *) pm_xstring_node_create(parser, &opening, &content, &parser->previous);
20329  }
20330 
20332 
20333  if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
20334  // In this case we've hit string content so we know the string
20335  // at least has something in it. We'll need to check if the
20336  // following token is the end (in which case we can return a
20337  // plain string) or if it's not then it has interpolation.
20338  pm_string_t unescaped = parser->current_string;
20339  pm_token_t content = parser->current;
20340  parser_lex(parser);
20341 
20342  if (match1(parser, PM_TOKEN_STRING_END)) {
20343  pm_node_t *node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
20344  pm_node_flag_set(node, parse_unescaped_encoding(parser));
20345  parser_lex(parser);
20346  return node;
20347  }
20348 
20349  // If we get here, then we have interpolation so we'll need to
20350  // create a string node with interpolation.
20351  node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20352 
20353  pm_token_t opening = not_provided(parser);
20354  pm_token_t closing = not_provided(parser);
20355 
20356  pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
20357  pm_node_flag_set(part, parse_unescaped_encoding(parser));
20358 
20359  pm_interpolated_xstring_node_append(node, part);
20360  } else {
20361  // If the first part of the body of the string is not a string
20362  // content, then we have interpolation and we need to create an
20363  // interpolated string node.
20364  node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
20365  }
20366 
20367  pm_node_t *part;
20368  while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
20369  if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
20370  pm_interpolated_xstring_node_append(node, part);
20371  }
20372  }
20373 
20374  pm_token_t closing = parser->current;
20375  if (match1(parser, PM_TOKEN_EOF)) {
20376  pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
20377  closing = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
20378  } else {
20379  expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
20380  }
20381  pm_interpolated_xstring_node_closing_set(node, &closing);
20382 
20383  return (pm_node_t *) node;
20384  }
20385  case PM_TOKEN_USTAR: {
20386  parser_lex(parser);
20387 
20388  // * operators at the beginning of expressions are only valid in the
20389  // context of a multiple assignment. We enforce that here. We'll
20390  // still lex past it though and create a missing node place.
20391  if (binding_power != PM_BINDING_POWER_STATEMENT) {
20392  pm_parser_err_prefix(parser, diag_id);
20393  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20394  }
20395 
20396  pm_token_t operator = parser->previous;
20397  pm_node_t *name = NULL;
20398 
20399  if (token_begins_expression_p(parser->current.type)) {
20400  name = parse_expression(parser, PM_BINDING_POWER_INDEX, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
20401  }
20402 
20403  pm_node_t *splat = (pm_node_t *) pm_splat_node_create(parser, &operator, name);
20404 
20405  if (match1(parser, PM_TOKEN_COMMA)) {
20406  return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
20407  } else {
20408  return parse_target_validate(parser, splat, true);
20409  }
20410  }
20411  case PM_TOKEN_BANG: {
20412  if (binding_power > PM_BINDING_POWER_UNARY) {
20413  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20414  }
20415 
20416  parser_lex(parser);
20417 
20418  pm_token_t operator = parser->previous;
20419  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20420  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
20421 
20422  pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
20423  return (pm_node_t *) node;
20424  }
20425  case PM_TOKEN_TILDE: {
20426  if (binding_power > PM_BINDING_POWER_UNARY) {
20427  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20428  }
20429  parser_lex(parser);
20430 
20431  pm_token_t operator = parser->previous;
20432  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20433  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
20434 
20435  return (pm_node_t *) node;
20436  }
20437  case PM_TOKEN_UMINUS: {
20438  if (binding_power > PM_BINDING_POWER_UNARY) {
20439  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20440  }
20441  parser_lex(parser);
20442 
20443  pm_token_t operator = parser->previous;
20444  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20445  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
20446 
20447  return (pm_node_t *) node;
20448  }
20449  case PM_TOKEN_UMINUS_NUM: {
20450  parser_lex(parser);
20451 
20452  pm_token_t operator = parser->previous;
20453  pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20454 
20455  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
20456  pm_token_t exponent_operator = parser->previous;
20457  pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.type].right, false, false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
20458  node = (pm_node_t *) pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0);
20459  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20460  } else {
20461  switch (PM_NODE_TYPE(node)) {
20462  case PM_INTEGER_NODE:
20463  case PM_FLOAT_NODE:
20464  case PM_RATIONAL_NODE:
20465  case PM_IMAGINARY_NODE:
20466  parse_negative_numeric(node);
20467  break;
20468  default:
20469  node = (pm_node_t *) pm_call_node_unary_create(parser, &operator, node, "-@");
20470  break;
20471  }
20472  }
20473 
20474  return node;
20475  }
20476  case PM_TOKEN_MINUS_GREATER: {
20477  int previous_lambda_enclosure_nesting = parser->lambda_enclosure_nesting;
20478  parser->lambda_enclosure_nesting = parser->enclosure_nesting;
20479 
20480  size_t opening_newline_index = token_newline_index(parser);
20481  pm_accepts_block_stack_push(parser, true);
20482  parser_lex(parser);
20483 
20484  pm_token_t operator = parser->previous;
20485  pm_parser_scope_push(parser, false);
20486 
20487  pm_block_parameters_node_t *block_parameters;
20488 
20489  switch (parser->current.type) {
20491  pm_token_t opening = parser->current;
20492  parser_lex(parser);
20493 
20494  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20495  block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20496  } else {
20497  block_parameters = parse_block_parameters(parser, false, &opening, true, true, (uint16_t) (depth + 1));
20498  }
20499 
20500  accept1(parser, PM_TOKEN_NEWLINE);
20501  expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20502 
20503  pm_block_parameters_node_closing_set(block_parameters, &parser->previous);
20504  break;
20505  }
20506  case PM_CASE_PARAMETER: {
20507  pm_accepts_block_stack_push(parser, false);
20508  pm_token_t opening = not_provided(parser);
20509  block_parameters = parse_block_parameters(parser, false, &opening, true, false, (uint16_t) (depth + 1));
20510  pm_accepts_block_stack_pop(parser);
20511  break;
20512  }
20513  default: {
20514  block_parameters = NULL;
20515  break;
20516  }
20517  }
20518 
20519  pm_token_t opening;
20520  pm_node_t *body = NULL;
20521  parser->lambda_enclosure_nesting = previous_lambda_enclosure_nesting;
20522 
20523  if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20524  opening = parser->previous;
20525 
20526  if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20527  body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_BRACES, (uint16_t) (depth + 1));
20528  }
20529 
20530  parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20531  expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE);
20532  } else {
20533  expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20534  opening = parser->previous;
20535 
20537  pm_accepts_block_stack_push(parser, true);
20538  body = (pm_node_t *) parse_statements(parser, PM_CONTEXT_LAMBDA_DO_END, (uint16_t) (depth + 1));
20539  pm_accepts_block_stack_pop(parser);
20540  }
20541 
20542  if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20543  assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20544  body = (pm_node_t *) parse_rescues_implicit_begin(parser, opening_newline_index, &operator, opening.start, (pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1));
20545  } else {
20546  parser_warn_indentation_mismatch(parser, opening_newline_index, &operator, false, false);
20547  }
20548 
20549  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END);
20550  }
20551 
20552  pm_constant_id_list_t locals;
20553  pm_locals_order(parser, &parser->current_scope->locals, &locals, pm_parser_scope_toplevel_p(parser));
20554  pm_node_t *parameters = parse_blocklike_parameters(parser, (pm_node_t *) block_parameters, &operator, &parser->previous);
20555 
20556  pm_parser_scope_pop(parser);
20557  pm_accepts_block_stack_pop(parser);
20558 
20559  return (pm_node_t *) pm_lambda_node_create(parser, &locals, &operator, &opening, &parser->previous, parameters, body);
20560  }
20561  case PM_TOKEN_UPLUS: {
20562  if (binding_power > PM_BINDING_POWER_UNARY) {
20563  pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20564  }
20565  parser_lex(parser);
20566 
20567  pm_token_t operator = parser->previous;
20568  pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20569  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
20570 
20571  return (pm_node_t *) node;
20572  }
20573  case PM_TOKEN_STRING_BEGIN:
20574  return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20575  case PM_TOKEN_SYMBOL_BEGIN: {
20576  pm_lex_mode_t lex_mode = *parser->lex_modes.current;
20577  parser_lex(parser);
20578 
20579  return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20580  }
20581  default: {
20582  pm_context_t recoverable = context_recoverable(parser, &parser->current);
20583 
20584  if (recoverable != PM_CONTEXT_NONE) {
20585  parser->recovering = true;
20586 
20587  // If the given error is not the generic one, then we'll add it
20588  // here because it will provide more context in addition to the
20589  // recoverable error that we will also add.
20590  if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20591  pm_parser_err_prefix(parser, diag_id);
20592  }
20593 
20594  // If we get here, then we are assuming this token is closing a
20595  // parent context, so we'll indicate that to the user so that
20596  // they know how we behaved.
20597  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT, pm_token_type_human(parser->current.type), context_human(recoverable));
20598  } else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20599  // We're going to make a special case here, because "cannot
20600  // parse expression" is pretty generic, and we know here that we
20601  // have an unexpected token.
20602  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
20603  } else {
20604  pm_parser_err_prefix(parser, diag_id);
20605  }
20606 
20607  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
20608  }
20609  }
20610 }
20611 
20621 static pm_node_t *
20622 parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20623  pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20624 
20625  // Contradicting binding powers, the right-hand-side value of the assignment
20626  // allows the `rescue` modifier.
20627  if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20628  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20629 
20630  pm_token_t rescue = parser->current;
20631  parser_lex(parser);
20632 
20633  pm_node_t *right = parse_expression(parser, binding_power, false, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20634  context_pop(parser);
20635 
20636  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20637  }
20638 
20639  return value;
20640 }
20641 
20646 static void
20647 parse_assignment_value_local(pm_parser_t *parser, const pm_node_t *node) {
20648  switch (PM_NODE_TYPE(node)) {
20649  case PM_BEGIN_NODE: {
20650  const pm_begin_node_t *cast = (const pm_begin_node_t *) node;
20651  if (cast->statements != NULL) parse_assignment_value_local(parser, (const pm_node_t *) cast->statements);
20652  break;
20653  }
20656  pm_locals_read(&pm_parser_scope_find(parser, cast->depth)->locals, cast->name);
20657  break;
20658  }
20659  case PM_PARENTHESES_NODE: {
20660  const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
20661  if (cast->body != NULL) parse_assignment_value_local(parser, cast->body);
20662  break;
20663  }
20664  case PM_STATEMENTS_NODE: {
20665  const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
20666  const pm_node_t *statement;
20667 
20668  PM_NODE_LIST_FOREACH(&cast->body, index, statement) {
20669  parse_assignment_value_local(parser, statement);
20670  }
20671  break;
20672  }
20673  default:
20674  break;
20675  }
20676 }
20677 
20690 static pm_node_t *
20691 parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, pm_diagnostic_id_t diag_id, uint16_t depth) {
20692  bool permitted = true;
20693  if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted = false;
20694 
20695  pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, diag_id, (uint16_t) (depth + 1));
20696  if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20697 
20698  parse_assignment_value_local(parser, value);
20699  bool single_value = true;
20700 
20701  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20702  single_value = false;
20703 
20704  pm_token_t opening = not_provided(parser);
20705  pm_array_node_t *array = pm_array_node_create(parser, &opening);
20706 
20707  pm_array_node_elements_append(array, value);
20708  value = (pm_node_t *) array;
20709 
20710  while (accept1(parser, PM_TOKEN_COMMA)) {
20711  pm_node_t *element = parse_starred_expression(parser, binding_power, false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20712 
20713  pm_array_node_elements_append(array, element);
20714  if (PM_NODE_TYPE_P(element, PM_MISSING_NODE)) break;
20715 
20716  parse_assignment_value_local(parser, element);
20717  }
20718  }
20719 
20720  // Contradicting binding powers, the right-hand-side value of the assignment
20721  // allows the `rescue` modifier.
20722  if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20723  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
20724 
20725  pm_token_t rescue = parser->current;
20726  parser_lex(parser);
20727 
20728  bool accepts_command_call_inner = false;
20729 
20730  // RHS can accept command call iff the value is a call with arguments
20731  // but without parenthesis.
20732  if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20733  pm_call_node_t *call_node = (pm_call_node_t *) value;
20734  if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
20735  accepts_command_call_inner = true;
20736  }
20737  }
20738 
20739  pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20740  context_pop(parser);
20741 
20742  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
20743  }
20744 
20745  return value;
20746 }
20747 
20755 static void
20756 parse_call_operator_write(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
20757  if (call_node->arguments != NULL) {
20758  pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20759  pm_node_destroy(parser, (pm_node_t *) call_node->arguments);
20760  call_node->arguments = NULL;
20761  }
20762 
20763  if (call_node->block != NULL) {
20764  pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20765  pm_node_destroy(parser, (pm_node_t *) call_node->block);
20766  call_node->block = NULL;
20767  }
20768 }
20769 
20774 typedef struct {
20777 
20780 
20783 
20786 
20792  bool shared;
20794 
20799 static void
20800 parse_regular_expression_named_capture(const pm_string_t *capture, void *data) {
20802 
20803  pm_parser_t *parser = callback_data->parser;
20804  pm_call_node_t *call = callback_data->call;
20805  pm_constant_id_list_t *names = &callback_data->names;
20806 
20807  const uint8_t *source = pm_string_source(capture);
20808  size_t length = pm_string_length(capture);
20809 
20810  pm_location_t location;
20811  pm_constant_id_t name;
20812 
20813  // If the name of the capture group isn't a valid identifier, we do
20814  // not add it to the local table.
20815  if (!pm_slice_is_valid_local(parser, source, source + length)) return;
20816 
20817  if (callback_data->shared) {
20818  // If the unescaped string is a slice of the source, then we can
20819  // copy the names directly. The pointers will line up.
20820  location = (pm_location_t) { .start = source, .end = source + length };
20821  name = pm_parser_constant_id_location(parser, location.start, location.end);
20822  } else {
20823  // Otherwise, the name is a slice of the malloc-ed owned string,
20824  // in which case we need to copy it out into a new string.
20825  location = (pm_location_t) { .start = call->receiver->location.start, .end = call->receiver->location.end };
20826 
20827  void *memory = xmalloc(length);
20828  if (memory == NULL) abort();
20829 
20830  memcpy(memory, source, length);
20831  name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20832  }
20833 
20834  // Add this name to the list of constants if it is valid, not duplicated,
20835  // and not a keyword.
20836  if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20837  pm_constant_id_list_append(names, name);
20838 
20839  int depth;
20840  if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20841  // If the local is not already a local but it is a keyword, then we
20842  // do not want to add a capture for this.
20843  if (pm_local_is_keyword((const char *) source, length)) return;
20844 
20845  // If the identifier is not already a local, then we will add it to
20846  // the local table.
20847  pm_parser_local_add(parser, name, location.start, location.end, 0);
20848  }
20849 
20850  // Here we lazily create the MatchWriteNode since we know we're
20851  // about to add a target.
20852  if (callback_data->match == NULL) {
20853  callback_data->match = pm_match_write_node_create(parser, call);
20854  }
20855 
20856  // Next, create the local variable target and add it to the list of
20857  // targets for the match.
20858  pm_node_t *target = (pm_node_t *) pm_local_variable_target_node_create(parser, &location, name, depth == -1 ? 0 : (uint32_t) depth);
20859  pm_node_list_append(&callback_data->match->targets, target);
20860  }
20861 }
20862 
20867 static pm_node_t *
20868 parse_regular_expression_named_captures(pm_parser_t *parser, const pm_string_t *content, pm_call_node_t *call, bool extended_mode) {
20870  .parser = parser,
20871  .call = call,
20872  .names = { 0 },
20873  .shared = content->type == PM_STRING_SHARED
20874  };
20875 
20877  .parser = parser,
20878  .start = call->receiver->location.start,
20879  .end = call->receiver->location.end,
20880  .shared = content->type == PM_STRING_SHARED
20881  };
20882 
20883  pm_regexp_parse(parser, pm_string_source(content), pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20884  pm_constant_id_list_free(&callback_data.names);
20885 
20886  if (callback_data.match != NULL) {
20887  return (pm_node_t *) callback_data.match;
20888  } else {
20889  return (pm_node_t *) call;
20890  }
20891 }
20892 
20893 static inline pm_node_t *
20894 parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power, bool accepts_command_call, uint16_t depth) {
20895  pm_token_t token = parser->current;
20896 
20897  switch (token.type) {
20898  case PM_TOKEN_EQUAL: {
20899  switch (PM_NODE_TYPE(node)) {
20900  case PM_CALL_NODE: {
20901  // If we have no arguments to the call node and we need this
20902  // to be a target then this is either a method call or a
20903  // local variable write. This _must_ happen before the value
20904  // is parsed because it could be referenced in the value.
20905  pm_call_node_t *call_node = (pm_call_node_t *) node;
20907  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20908  }
20909  }
20910  /* fallthrough */
20911  case PM_CASE_WRITABLE: {
20912  parser_lex(parser);
20913  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20914 
20915  if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20916  pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20917  }
20918 
20919  return parse_write(parser, node, &token, value);
20920  }
20921  case PM_SPLAT_NODE: {
20922  pm_multi_target_node_t *multi_target = pm_multi_target_node_create(parser);
20923  pm_multi_target_node_targets_append(parser, multi_target, node);
20924 
20925  parser_lex(parser);
20926  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20927  return parse_write(parser, (pm_node_t *) multi_target, &token, value);
20928  }
20930  case PM_FALSE_NODE:
20931  case PM_SOURCE_FILE_NODE:
20932  case PM_SOURCE_LINE_NODE:
20933  case PM_NIL_NODE:
20934  case PM_SELF_NODE:
20935  case PM_TRUE_NODE: {
20936  // In these special cases, we have specific error messages
20937  // and we will replace them with local variable writes.
20938  parser_lex(parser);
20939  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20940  return parse_unwriteable_write(parser, node, &token, value);
20941  }
20942  default:
20943  // In this case we have an = sign, but we don't know what
20944  // it's for. We need to treat it as an error. We'll mark it
20945  // as an error and skip past it.
20946  parser_lex(parser);
20947  pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20948  return node;
20949  }
20950  }
20952  switch (PM_NODE_TYPE(node)) {
20955  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20956  /* fallthrough */
20958  parser_lex(parser);
20959 
20960  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20961  pm_node_t *result = (pm_node_t *) pm_global_variable_and_write_node_create(parser, node, &token, value);
20962 
20963  pm_node_destroy(parser, node);
20964  return result;
20965  }
20967  parser_lex(parser);
20968 
20969  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20970  pm_node_t *result = (pm_node_t *) pm_class_variable_and_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
20971 
20972  pm_node_destroy(parser, node);
20973  return result;
20974  }
20975  case PM_CONSTANT_PATH_NODE: {
20976  parser_lex(parser);
20977 
20978  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20979  pm_node_t *write = (pm_node_t *) pm_constant_path_and_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
20980 
20981  return parse_shareable_constant_write(parser, write);
20982  }
20983  case PM_CONSTANT_READ_NODE: {
20984  parser_lex(parser);
20985 
20986  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20987  pm_node_t *write = (pm_node_t *) pm_constant_and_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
20988 
20989  pm_node_destroy(parser, node);
20990  return parse_shareable_constant_write(parser, write);
20991  }
20993  parser_lex(parser);
20994 
20995  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20996  pm_node_t *result = (pm_node_t *) pm_instance_variable_and_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
20997 
20998  pm_node_destroy(parser, node);
20999  return result;
21000  }
21003  parser_lex(parser);
21004 
21005  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21006  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21007 
21008  pm_node_destroy(parser, node);
21009  return result;
21010  }
21011  case PM_CALL_NODE: {
21012  pm_call_node_t *cast = (pm_call_node_t *) node;
21013 
21014  // If we have a vcall (a method with no arguments and no
21015  // receiver that could have been a local variable) then we
21016  // will transform it into a local variable write.
21018  pm_location_t *message_loc = &cast->message_loc;
21019  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21020 
21021  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21022  parser_lex(parser);
21023 
21024  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21025  pm_node_t *result = (pm_node_t *) pm_local_variable_and_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21026 
21027  pm_node_destroy(parser, (pm_node_t *) cast);
21028  return result;
21029  }
21030 
21031  // Move past the token here so that we have already added
21032  // the local variable by this point.
21033  parser_lex(parser);
21034 
21035  // If there is no call operator and the message is "[]" then
21036  // this is an aref expression, and we can transform it into
21037  // an aset expression.
21038  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21039  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21040  return (pm_node_t *) pm_index_and_write_node_create(parser, cast, &token, value);
21041  }
21042 
21043  // If this node cannot be writable, then we have an error.
21044  if (pm_call_node_writable_p(parser, cast)) {
21045  parse_write_name(parser, &cast->name);
21046  } else {
21047  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21048  }
21049 
21050  parse_call_operator_write(parser, cast, &token);
21051  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
21052  return (pm_node_t *) pm_call_and_write_node_create(parser, cast, &token, value);
21053  }
21054  case PM_MULTI_WRITE_NODE: {
21055  parser_lex(parser);
21056  pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
21057  return node;
21058  }
21059  default:
21060  parser_lex(parser);
21061 
21062  // In this case we have an &&= sign, but we don't know what it's for.
21063  // We need to treat it as an error. For now, we'll mark it as an error
21064  // and just skip right past it.
21065  pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
21066  return node;
21067  }
21068  }
21069  case PM_TOKEN_PIPE_PIPE_EQUAL: {
21070  switch (PM_NODE_TYPE(node)) {
21073  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21074  /* fallthrough */
21076  parser_lex(parser);
21077 
21078  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21079  pm_node_t *result = (pm_node_t *) pm_global_variable_or_write_node_create(parser, node, &token, value);
21080 
21081  pm_node_destroy(parser, node);
21082  return result;
21083  }
21085  parser_lex(parser);
21086 
21087  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21088  pm_node_t *result = (pm_node_t *) pm_class_variable_or_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21089 
21090  pm_node_destroy(parser, node);
21091  return result;
21092  }
21093  case PM_CONSTANT_PATH_NODE: {
21094  parser_lex(parser);
21095 
21096  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21097  pm_node_t *write = (pm_node_t *) pm_constant_path_or_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21098 
21099  return parse_shareable_constant_write(parser, write);
21100  }
21101  case PM_CONSTANT_READ_NODE: {
21102  parser_lex(parser);
21103 
21104  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21105  pm_node_t *write = (pm_node_t *) pm_constant_or_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21106 
21107  pm_node_destroy(parser, node);
21108  return parse_shareable_constant_write(parser, write);
21109  }
21111  parser_lex(parser);
21112 
21113  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21114  pm_node_t *result = (pm_node_t *) pm_instance_variable_or_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21115 
21116  pm_node_destroy(parser, node);
21117  return result;
21118  }
21121  parser_lex(parser);
21122 
21123  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21124  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21125 
21126  pm_node_destroy(parser, node);
21127  return result;
21128  }
21129  case PM_CALL_NODE: {
21130  pm_call_node_t *cast = (pm_call_node_t *) node;
21131 
21132  // If we have a vcall (a method with no arguments and no
21133  // receiver that could have been a local variable) then we
21134  // will transform it into a local variable write.
21136  pm_location_t *message_loc = &cast->message_loc;
21137  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21138 
21139  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21140  parser_lex(parser);
21141 
21142  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21143  pm_node_t *result = (pm_node_t *) pm_local_variable_or_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21144 
21145  pm_node_destroy(parser, (pm_node_t *) cast);
21146  return result;
21147  }
21148 
21149  // Move past the token here so that we have already added
21150  // the local variable by this point.
21151  parser_lex(parser);
21152 
21153  // If there is no call operator and the message is "[]" then
21154  // this is an aref expression, and we can transform it into
21155  // an aset expression.
21156  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21157  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21158  return (pm_node_t *) pm_index_or_write_node_create(parser, cast, &token, value);
21159  }
21160 
21161  // If this node cannot be writable, then we have an error.
21162  if (pm_call_node_writable_p(parser, cast)) {
21163  parse_write_name(parser, &cast->name);
21164  } else {
21165  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21166  }
21167 
21168  parse_call_operator_write(parser, cast, &token);
21169  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
21170  return (pm_node_t *) pm_call_or_write_node_create(parser, cast, &token, value);
21171  }
21172  case PM_MULTI_WRITE_NODE: {
21173  parser_lex(parser);
21174  pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
21175  return node;
21176  }
21177  default:
21178  parser_lex(parser);
21179 
21180  // In this case we have an ||= sign, but we don't know what it's for.
21181  // We need to treat it as an error. For now, we'll mark it as an error
21182  // and just skip right past it.
21183  pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
21184  return node;
21185  }
21186  }
21188  case PM_TOKEN_CARET_EQUAL:
21191  case PM_TOKEN_MINUS_EQUAL:
21193  case PM_TOKEN_PIPE_EQUAL:
21194  case PM_TOKEN_PLUS_EQUAL:
21195  case PM_TOKEN_SLASH_EQUAL:
21196  case PM_TOKEN_STAR_EQUAL:
21197  case PM_TOKEN_STAR_STAR_EQUAL: {
21198  switch (PM_NODE_TYPE(node)) {
21201  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21202  /* fallthrough */
21204  parser_lex(parser);
21205 
21206  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21207  pm_node_t *result = (pm_node_t *) pm_global_variable_operator_write_node_create(parser, node, &token, value);
21208 
21209  pm_node_destroy(parser, node);
21210  return result;
21211  }
21213  parser_lex(parser);
21214 
21215  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21216  pm_node_t *result = (pm_node_t *) pm_class_variable_operator_write_node_create(parser, (pm_class_variable_read_node_t *) node, &token, value);
21217 
21218  pm_node_destroy(parser, node);
21219  return result;
21220  }
21221  case PM_CONSTANT_PATH_NODE: {
21222  parser_lex(parser);
21223 
21224  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21225  pm_node_t *write = (pm_node_t *) pm_constant_path_operator_write_node_create(parser, (pm_constant_path_node_t *) node, &token, value);
21226 
21227  return parse_shareable_constant_write(parser, write);
21228  }
21229  case PM_CONSTANT_READ_NODE: {
21230  parser_lex(parser);
21231 
21232  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21233  pm_node_t *write = (pm_node_t *) pm_constant_operator_write_node_create(parser, (pm_constant_read_node_t *) node, &token, value);
21234 
21235  pm_node_destroy(parser, node);
21236  return parse_shareable_constant_write(parser, write);
21237  }
21239  parser_lex(parser);
21240 
21241  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21242  pm_node_t *result = (pm_node_t *) pm_instance_variable_operator_write_node_create(parser, (pm_instance_variable_read_node_t *) node, &token, value);
21243 
21244  pm_node_destroy(parser, node);
21245  return result;
21246  }
21249  parser_lex(parser);
21250 
21251  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21252  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->name, cast->depth);
21253 
21254  pm_node_destroy(parser, node);
21255  return result;
21256  }
21257  case PM_CALL_NODE: {
21258  parser_lex(parser);
21259  pm_call_node_t *cast = (pm_call_node_t *) node;
21260 
21261  // If we have a vcall (a method with no arguments and no
21262  // receiver that could have been a local variable) then we
21263  // will transform it into a local variable write.
21265  pm_location_t *message_loc = &cast->message_loc;
21266  pm_refute_numbered_parameter(parser, message_loc->start, message_loc->end);
21267 
21268  pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc->start, message_loc->end, 1);
21269  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21270  pm_node_t *result = (pm_node_t *) pm_local_variable_operator_write_node_create(parser, (pm_node_t *) cast, &token, value, constant_id, 0);
21271 
21272  pm_node_destroy(parser, (pm_node_t *) cast);
21273  return result;
21274  }
21275 
21276  // If there is no call operator and the message is "[]" then
21277  // this is an aref expression, and we can transform it into
21278  // an aset expression.
21279  if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
21280  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21281  return (pm_node_t *) pm_index_operator_write_node_create(parser, cast, &token, value);
21282  }
21283 
21284  // If this node cannot be writable, then we have an error.
21285  if (pm_call_node_writable_p(parser, cast)) {
21286  parse_write_name(parser, &cast->name);
21287  } else {
21288  pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
21289  }
21290 
21291  parse_call_operator_write(parser, cast, &token);
21292  pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21293  return (pm_node_t *) pm_call_operator_write_node_create(parser, cast, &token, value);
21294  }
21295  case PM_MULTI_WRITE_NODE: {
21296  parser_lex(parser);
21297  pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21298  return node;
21299  }
21300  default:
21301  parser_lex(parser);
21302 
21303  // In this case we have an operator but we don't know what it's for.
21304  // We need to treat it as an error. For now, we'll mark it as an error
21305  // and just skip right past it.
21306  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, pm_token_type_human(parser->current.type));
21307  return node;
21308  }
21309  }
21311  case PM_TOKEN_KEYWORD_AND: {
21312  parser_lex(parser);
21313 
21314  pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_AND, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21315  return (pm_node_t *) pm_and_node_create(parser, node, &token, right);
21316  }
21317  case PM_TOKEN_KEYWORD_OR:
21318  case PM_TOKEN_PIPE_PIPE: {
21319  parser_lex(parser);
21320 
21321  pm_node_t *right = parse_expression(parser, binding_power, parser->previous.type == PM_TOKEN_KEYWORD_OR, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21322  return (pm_node_t *) pm_or_node_create(parser, node, &token, right);
21323  }
21324  case PM_TOKEN_EQUAL_TILDE: {
21325  // Note that we _must_ parse the value before adding the local
21326  // variables in order to properly mirror the behavior of Ruby. For
21327  // example,
21328  //
21329  // /(?<foo>bar)/ =~ foo
21330  //
21331  // In this case, `foo` should be a method call and not a local yet.
21332  parser_lex(parser);
21333  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21334 
21335  // By default, we're going to create a call node and then return it.
21336  pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21337  pm_node_t *result = (pm_node_t *) call;
21338 
21339  // If the receiver of this =~ is a regular expression node, then we
21340  // need to introduce local variables for it based on its named
21341  // capture groups.
21343  // It's possible to have an interpolated regular expression node
21344  // that only contains strings. This is because it can be split
21345  // up by a heredoc. In this case we need to concat the unescaped
21346  // strings together and then parse them as a regular expression.
21347  pm_node_list_t *parts = &((pm_interpolated_regular_expression_node_t *) node)->parts;
21348 
21349  bool interpolated = false;
21350  size_t total_length = 0;
21351 
21352  pm_node_t *part;
21353  PM_NODE_LIST_FOREACH(parts, index, part) {
21354  if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21355  total_length += pm_string_length(&((pm_string_node_t *) part)->unescaped);
21356  } else {
21357  interpolated = true;
21358  break;
21359  }
21360  }
21361 
21362  if (!interpolated && total_length > 0) {
21363  void *memory = xmalloc(total_length);
21364  if (!memory) abort();
21365 
21366  uint8_t *cursor = memory;
21367  PM_NODE_LIST_FOREACH(parts, index, part) {
21368  pm_string_t *unescaped = &((pm_string_node_t *) part)->unescaped;
21369  size_t length = pm_string_length(unescaped);
21370 
21371  memcpy(cursor, pm_string_source(unescaped), length);
21372  cursor += length;
21373  }
21374 
21375  pm_string_t owned;
21376  pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21377 
21378  result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21379  pm_string_free(&owned);
21380  }
21381  } else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21382  // If we have a regular expression node, then we can just parse
21383  // the named captures directly off the unescaped string.
21384  const pm_string_t *content = &((pm_regular_expression_node_t *) node)->unescaped;
21385  result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21386  }
21387 
21388  return result;
21389  }
21390  case PM_TOKEN_UAMPERSAND:
21391  case PM_TOKEN_USTAR:
21392  case PM_TOKEN_USTAR_STAR:
21393  // The only times this will occur are when we are in an error state,
21394  // but we'll put them in here so that errors can propagate.
21395  case PM_TOKEN_BANG_EQUAL:
21396  case PM_TOKEN_BANG_TILDE:
21397  case PM_TOKEN_EQUAL_EQUAL:
21400  case PM_TOKEN_CARET:
21401  case PM_TOKEN_PIPE:
21402  case PM_TOKEN_AMPERSAND:
21404  case PM_TOKEN_LESS_LESS:
21405  case PM_TOKEN_MINUS:
21406  case PM_TOKEN_PLUS:
21407  case PM_TOKEN_PERCENT:
21408  case PM_TOKEN_SLASH:
21409  case PM_TOKEN_STAR:
21410  case PM_TOKEN_STAR_STAR: {
21411  parser_lex(parser);
21412  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21413  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21414  }
21415  case PM_TOKEN_GREATER:
21417  case PM_TOKEN_LESS:
21418  case PM_TOKEN_LESS_EQUAL: {
21419  if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21420  PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21421  }
21422 
21423  parser_lex(parser);
21424  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21425  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON);
21426  }
21428  case PM_TOKEN_DOT: {
21429  parser_lex(parser);
21430  pm_token_t operator = parser->previous;
21431  pm_arguments_t arguments = { 0 };
21432 
21433  // This if statement handles the foo.() syntax.
21434  if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21435  parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21436  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21437  }
21438 
21439  pm_token_t message;
21440 
21441  switch (parser->current.type) {
21442  case PM_CASE_OPERATOR:
21443  case PM_CASE_KEYWORD:
21444  case PM_TOKEN_CONSTANT:
21445  case PM_TOKEN_IDENTIFIER:
21446  case PM_TOKEN_METHOD_NAME: {
21447  parser_lex(parser);
21448  message = parser->previous;
21449  break;
21450  }
21451  default: {
21452  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_MESSAGE, pm_token_type_human(parser->current.type));
21453  message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21454  }
21455  }
21456 
21457  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21458  pm_call_node_t *call = pm_call_node_call_create(parser, node, &operator, &message, &arguments);
21459 
21460  if (
21461  (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21462  arguments.arguments == NULL &&
21463  arguments.opening_loc.start == NULL &&
21464  match1(parser, PM_TOKEN_COMMA)
21465  ) {
21466  return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21467  } else {
21468  return (pm_node_t *) call;
21469  }
21470  }
21471  case PM_TOKEN_DOT_DOT:
21472  case PM_TOKEN_DOT_DOT_DOT: {
21473  parser_lex(parser);
21474 
21475  pm_node_t *right = NULL;
21476  if (token_begins_expression_p(parser->current.type)) {
21477  right = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21478  }
21479 
21480  return (pm_node_t *) pm_range_node_create(parser, node, &token, right);
21481  }
21483  pm_token_t keyword = parser->current;
21484  parser_lex(parser);
21485 
21486  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21487  return (pm_node_t *) pm_if_node_modifier_create(parser, node, &keyword, predicate);
21488  }
21490  pm_token_t keyword = parser->current;
21491  parser_lex(parser);
21492 
21493  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21494  return (pm_node_t *) pm_unless_node_modifier_create(parser, node, &keyword, predicate);
21495  }
21497  parser_lex(parser);
21498  pm_statements_node_t *statements = pm_statements_node_create(parser);
21499  pm_statements_node_body_append(parser, statements, node, true);
21500 
21501  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21502  return (pm_node_t *) pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21503  }
21505  parser_lex(parser);
21506  pm_statements_node_t *statements = pm_statements_node_create(parser);
21507  pm_statements_node_body_append(parser, statements, node, true);
21508 
21509  pm_node_t *predicate = parse_value_expression(parser, binding_power, true, false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21510  return (pm_node_t *) pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0);
21511  }
21512  case PM_TOKEN_QUESTION_MARK: {
21513  context_push(parser, PM_CONTEXT_TERNARY);
21514  pm_node_list_t current_block_exits = { 0 };
21515  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
21516 
21517  pm_token_t qmark = parser->current;
21518  parser_lex(parser);
21519 
21520  pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21521 
21522  if (parser->recovering) {
21523  // If parsing the true expression of this ternary resulted in a syntax
21524  // error that we can recover from, then we're going to put missing nodes
21525  // and tokens into the remaining places. We want to be sure to do this
21526  // before the `expect` function call to make sure it doesn't
21527  // accidentally move past a ':' token that occurs after the syntax
21528  // error.
21529  pm_token_t colon = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
21530  pm_node_t *false_expression = (pm_node_t *) pm_missing_node_create(parser, colon.start, colon.end);
21531 
21532  context_pop(parser);
21533  pop_block_exits(parser, previous_block_exits);
21534  pm_node_list_free(&current_block_exits);
21535 
21536  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21537  }
21538 
21539  accept1(parser, PM_TOKEN_NEWLINE);
21540  expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21541 
21542  pm_token_t colon = parser->previous;
21543  pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21544 
21545  context_pop(parser);
21546  pop_block_exits(parser, previous_block_exits);
21547  pm_node_list_free(&current_block_exits);
21548 
21549  return (pm_node_t *) pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression);
21550  }
21551  case PM_TOKEN_COLON_COLON: {
21552  parser_lex(parser);
21553  pm_token_t delimiter = parser->previous;
21554 
21555  switch (parser->current.type) {
21556  case PM_TOKEN_CONSTANT: {
21557  parser_lex(parser);
21558  pm_node_t *path;
21559 
21560  if (
21561  (parser->current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21562  (accepts_command_call && (token_begins_expression_p(parser->current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21563  ) {
21564  // If we have a constant immediately following a '::' operator, then
21565  // this can either be a constant path or a method call, depending on
21566  // what follows the constant.
21567  //
21568  // If we have parentheses, then this is a method call. That would
21569  // look like Foo::Bar().
21570  pm_token_t message = parser->previous;
21571  pm_arguments_t arguments = { 0 };
21572 
21573  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21574  path = (pm_node_t *) pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21575  } else {
21576  // Otherwise, this is a constant path. That would look like Foo::Bar.
21577  path = (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21578  }
21579 
21580  // If this is followed by a comma then it is a multiple assignment.
21581  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21582  return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21583  }
21584 
21585  return path;
21586  }
21587  case PM_CASE_OPERATOR:
21588  case PM_CASE_KEYWORD:
21589  case PM_TOKEN_IDENTIFIER:
21590  case PM_TOKEN_METHOD_NAME: {
21591  parser_lex(parser);
21592  pm_token_t message = parser->previous;
21593 
21594  // If we have an identifier following a '::' operator, then it is for
21595  // sure a method call.
21596  pm_arguments_t arguments = { 0 };
21597  parse_arguments_list(parser, &arguments, true, accepts_command_call, (uint16_t) (depth + 1));
21598  pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21599 
21600  // If this is followed by a comma then it is a multiple assignment.
21601  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21602  return parse_targets_validate(parser, (pm_node_t *) call, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21603  }
21604 
21605  return (pm_node_t *) call;
21606  }
21608  // If we have a parenthesis following a '::' operator, then it is the
21609  // method call shorthand. That would look like Foo::(bar).
21610  pm_arguments_t arguments = { 0 };
21611  parse_arguments_list(parser, &arguments, true, false, (uint16_t) (depth + 1));
21612 
21613  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
21614  }
21615  default: {
21616  expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21617  return (pm_node_t *) pm_constant_path_node_create(parser, node, &delimiter, &parser->previous);
21618  }
21619  }
21620  }
21622  context_push(parser, PM_CONTEXT_RESCUE_MODIFIER);
21623  parser_lex(parser);
21624  accept1(parser, PM_TOKEN_NEWLINE);
21625 
21626  pm_node_t *value = parse_expression(parser, binding_power, true, false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21627  context_pop(parser);
21628 
21629  return (pm_node_t *) pm_rescue_modifier_node_create(parser, node, &token, value);
21630  }
21631  case PM_TOKEN_BRACKET_LEFT: {
21632  parser_lex(parser);
21633 
21634  pm_arguments_t arguments = { 0 };
21635  arguments.opening_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21636 
21637  if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21638  pm_accepts_block_stack_push(parser, true);
21639  parse_arguments(parser, &arguments, false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21640  pm_accepts_block_stack_pop(parser);
21641  expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21642  }
21643 
21644  arguments.closing_loc = PM_LOCATION_TOKEN_VALUE(&parser->previous);
21645 
21646  // If we have a comma after the closing bracket then this is a multiple
21647  // assignment and we should parse the targets.
21648  if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21649  pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21650  return parse_targets_validate(parser, (pm_node_t *) aref, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21651  }
21652 
21653  // If we're at the end of the arguments, we can now check if there is a
21654  // block node that starts with a {. If there is, then we can parse it and
21655  // add it to the arguments.
21656  pm_block_node_t *block = NULL;
21657  if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21658  block = parse_block(parser, (uint16_t) (depth + 1));
21659  pm_arguments_validate_block(parser, &arguments, block);
21660  } else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21661  block = parse_block(parser, (uint16_t) (depth + 1));
21662  }
21663 
21664  if (block != NULL) {
21665  if (arguments.block != NULL) {
21666  pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
21667  if (arguments.arguments == NULL) {
21668  arguments.arguments = pm_arguments_node_create(parser);
21669  }
21670  pm_arguments_node_arguments_append(arguments.arguments, arguments.block);
21671  }
21672 
21673  arguments.block = (pm_node_t *) block;
21674  }
21675 
21676  return (pm_node_t *) pm_call_node_aref_create(parser, node, &arguments);
21677  }
21678  case PM_TOKEN_KEYWORD_IN: {
21679  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21680  parser->pattern_matching_newlines = true;
21681 
21682  pm_token_t operator = parser->current;
21683  parser->command_start = false;
21684  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21685  parser_lex(parser);
21686 
21687  pm_constant_id_list_t captures = { 0 };
21688  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21689 
21690  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21691  pm_constant_id_list_free(&captures);
21692 
21693  return (pm_node_t *) pm_match_predicate_node_create(parser, node, pattern, &operator);
21694  }
21695  case PM_TOKEN_EQUAL_GREATER: {
21696  bool previous_pattern_matching_newlines = parser->pattern_matching_newlines;
21697  parser->pattern_matching_newlines = true;
21698 
21699  pm_token_t operator = parser->current;
21700  parser->command_start = false;
21701  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21702  parser_lex(parser);
21703 
21704  pm_constant_id_list_t captures = { 0 };
21705  pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21706 
21707  parser->pattern_matching_newlines = previous_pattern_matching_newlines;
21708  pm_constant_id_list_free(&captures);
21709 
21710  return (pm_node_t *) pm_match_required_node_create(parser, node, pattern, &operator);
21711  }
21712  default:
21713  assert(false && "unreachable");
21714  return NULL;
21715  }
21716 }
21717 
21718 #undef PM_PARSE_PATTERN_SINGLE
21719 #undef PM_PARSE_PATTERN_TOP
21720 #undef PM_PARSE_PATTERN_MULTI
21721 
21726 static inline bool
21727 pm_call_node_command_p(const pm_call_node_t *node) {
21728  return (
21729  (node->opening_loc.start == NULL) &&
21730  (node->block == NULL || PM_NODE_TYPE_P(node->block, PM_BLOCK_ARGUMENT_NODE)) &&
21731  (node->arguments != NULL || node->block != NULL)
21732  );
21733 }
21734 
21743 static pm_node_t *
21744 parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool accepts_command_call, bool accepts_label, pm_diagnostic_id_t diag_id, uint16_t depth) {
21745  if (PRISM_UNLIKELY(depth >= PRISM_DEPTH_MAXIMUM)) {
21746  pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21747  return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
21748  }
21749 
21750  pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21751 
21752  switch (PM_NODE_TYPE(node)) {
21753  case PM_MISSING_NODE:
21754  // If we found a syntax error, then the type of node returned by
21755  // parse_expression_prefix is going to be a missing node.
21756  return node;
21757  case PM_PRE_EXECUTION_NODE:
21760  case PM_ALIAS_METHOD_NODE:
21761  case PM_MULTI_WRITE_NODE:
21762  case PM_UNDEF_NODE:
21763  // These expressions are statements, and cannot be followed by
21764  // operators (except modifiers).
21765  if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21766  return node;
21767  }
21768  break;
21769  case PM_CALL_NODE:
21770  // If we have a call node, then we need to check if it looks like a
21771  // method call without parentheses that contains arguments. If it
21772  // does, then it has different rules for parsing infix operators,
21773  // namely that it only accepts composition (and/or) and modifiers
21774  // (if/unless/etc.).
21775  if ((pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((pm_call_node_t *) node)) {
21776  return node;
21777  }
21778  break;
21779  case PM_SYMBOL_NODE:
21780  // If we have a symbol node that is being parsed as a label, then we
21781  // need to immediately return, because there should never be an
21782  // infix operator following this node.
21783  if (pm_symbol_node_label_p(node)) {
21784  return node;
21785  }
21786  default:
21787  break;
21788  }
21789 
21790  // Otherwise we'll look and see if the next token can be parsed as an infix
21791  // operator. If it can, then we'll parse it using parse_expression_infix.
21792  pm_binding_powers_t current_binding_powers;
21793  pm_token_type_t current_token_type;
21794 
21795  while (
21796  current_token_type = parser->current.type,
21797  current_binding_powers = pm_binding_powers[current_token_type],
21798  binding_power <= current_binding_powers.left &&
21799  current_binding_powers.binary
21800  ) {
21801  node = parse_expression_infix(parser, node, binding_power, current_binding_powers.right, accepts_command_call, (uint16_t) (depth + 1));
21802 
21803  switch (PM_NODE_TYPE(node)) {
21804  case PM_MULTI_WRITE_NODE:
21805  // Multi-write nodes are statements, and cannot be followed by
21806  // operators except modifiers.
21807  if (pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21808  return node;
21809  }
21810  break;
21817  // These expressions are statements, by virtue of the right-hand
21818  // side of their write being an implicit array.
21819  if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21820  return node;
21821  }
21822  break;
21823  case PM_CALL_NODE:
21824  // These expressions are also statements, by virtue of the
21825  // right-hand side of the expression (i.e., the last argument to
21826  // the call node) being an implicit array.
21827  if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->current.type].left > PM_BINDING_POWER_MODIFIER) {
21828  return node;
21829  }
21830  break;
21831  default:
21832  break;
21833  }
21834 
21835  // If the operator is nonassoc and we should not be able to parse the
21836  // upcoming infix operator, break.
21837  if (current_binding_powers.nonassoc) {
21838  // If this is a non-assoc operator and we are about to parse the
21839  // exact same operator, then we need to add an error.
21840  if (match1(parser, current_token_type)) {
21841  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21842  break;
21843  }
21844 
21845  // If this is an endless range, then we need to reject a couple of
21846  // additional operators because it violates the normal operator
21847  // precedence rules. Those patterns are:
21848  //
21849  // 1.. & 2
21850  // 1.. * 2
21851  //
21852  if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((pm_range_node_t *) node)->right == NULL) {
21854  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_NON_ASSOCIATIVE_OPERATOR, pm_token_type_human(parser->current.type), pm_token_type_human(current_token_type));
21855  break;
21856  }
21857 
21858  if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->current.type].left) {
21859  break;
21860  }
21861  } else if (current_binding_powers.left <= pm_binding_powers[parser->current.type].left) {
21862  break;
21863  }
21864  }
21865 
21866  if (accepts_command_call) {
21867  // A command-style method call is only accepted on method chains.
21868  // Thus, we check whether the parsed node can continue method chains.
21869  // The method chain can continue if the parsed node is one of the following five kinds:
21870  // (1) index access: foo[1]
21871  // (2) attribute access: foo.bar
21872  // (3) method call with parenthesis: foo.bar(1)
21873  // (4) method call with a block: foo.bar do end
21874  // (5) constant path: foo::Bar
21875  switch (node->type) {
21876  case PM_CALL_NODE: {
21877  pm_call_node_t *cast = (pm_call_node_t *)node;
21878  if (
21879  // (1) foo[1]
21880  !(
21881  cast->call_operator_loc.start == NULL &&
21882  cast->message_loc.start != NULL &&
21883  cast->message_loc.start[0] == '[' &&
21884  cast->message_loc.end[-1] == ']'
21885  ) &&
21886  // (2) foo.bar
21887  !(
21888  cast->call_operator_loc.start != NULL &&
21889  cast->arguments == NULL &&
21890  cast->block == NULL &&
21891  cast->opening_loc.start == NULL
21892  ) &&
21893  // (3) foo.bar(1)
21894  !(
21895  cast->call_operator_loc.start != NULL &&
21896  cast->opening_loc.start != NULL
21897  ) &&
21898  // (4) foo.bar do end
21899  !(
21900  cast->block != NULL && PM_NODE_TYPE_P(cast->block, PM_BLOCK_NODE)
21901  )
21902  ) {
21903  accepts_command_call = false;
21904  }
21905  break;
21906  }
21907  // (5) foo::Bar
21908  case PM_CONSTANT_PATH_NODE:
21909  break;
21910  default:
21911  accepts_command_call = false;
21912  break;
21913  }
21914  }
21915  }
21916 
21917  return node;
21918 }
21919 
21924 static pm_statements_node_t *
21925 wrap_statements(pm_parser_t *parser, pm_statements_node_t *statements) {
21926  if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21927  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21928  pm_arguments_node_arguments_append(
21929  arguments,
21930  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2))
21931  );
21932 
21933  pm_statements_node_body_append(parser, statements, (pm_node_t *) pm_call_node_fcall_synthesized_create(
21934  parser,
21935  arguments,
21936  pm_parser_constant_id_constant(parser, "print", 5)
21937  ), true);
21938  }
21939 
21940  if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21941  if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21942  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21943  pm_arguments_node_arguments_append(
21944  arguments,
21945  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$;", 2))
21946  );
21947 
21948  pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$_", 2));
21949  pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, (pm_node_t *) receiver, "split", arguments);
21950 
21951  pm_global_variable_write_node_t *write = pm_global_variable_write_node_synthesized_create(
21952  parser,
21953  pm_parser_constant_id_constant(parser, "$F", 2),
21954  (pm_node_t *) call
21955  );
21956 
21957  pm_statements_node_body_prepend(statements, (pm_node_t *) write);
21958  }
21959 
21960  pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
21961  pm_arguments_node_arguments_append(
21962  arguments,
21963  (pm_node_t *) pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser, "$/", 2))
21964  );
21965 
21966  if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21967  pm_keyword_hash_node_t *keywords = pm_keyword_hash_node_create(parser);
21968  pm_keyword_hash_node_elements_append(keywords, (pm_node_t *) pm_assoc_node_create(
21969  parser,
21970  (pm_node_t *) pm_symbol_node_synthesized_create(parser, "chomp"),
21971  &(pm_token_t) { .type = PM_TOKEN_NOT_PROVIDED, .start = parser->start, .end = parser->start },
21972  (pm_node_t *) pm_true_node_synthesized_create(parser)
21973  ));
21974 
21975  pm_arguments_node_arguments_append(arguments, (pm_node_t *) keywords);
21976  pm_node_flag_set((pm_node_t *) arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21977  }
21978 
21979  pm_statements_node_t *wrapped_statements = pm_statements_node_create(parser);
21980  pm_statements_node_body_append(parser, wrapped_statements, (pm_node_t *) pm_while_node_synthesized_create(
21981  parser,
21982  (pm_node_t *) pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser, "gets", 4)),
21983  statements
21984  ), true);
21985 
21986  statements = wrapped_statements;
21987  }
21988 
21989  return statements;
21990 }
21991 
21995 static pm_node_t *
21996 parse_program(pm_parser_t *parser) {
21997  // If the current scope is NULL, then we want to push a new top level scope.
21998  // The current scope could exist in the event that we are parsing an eval
21999  // and the user has passed into scopes that already exist.
22000  if (parser->current_scope == NULL) {
22001  pm_parser_scope_push(parser, true);
22002  }
22003 
22004  pm_node_list_t current_block_exits = { 0 };
22005  pm_node_list_t *previous_block_exits = push_block_exits(parser, &current_block_exits);
22006 
22007  parser_lex(parser);
22008  pm_statements_node_t *statements = parse_statements(parser, PM_CONTEXT_MAIN, 0);
22009 
22010  if (statements == NULL) {
22011  statements = pm_statements_node_create(parser);
22012  } else if (!parser->parsing_eval) {
22013  // If we have statements, then the top-level statement should be
22014  // explicitly checked as well. We have to do this here because
22015  // everywhere else we check all but the last statement.
22016  assert(statements->body.size > 0);
22017  pm_void_statement_check(parser, statements->body.nodes[statements->body.size - 1]);
22018  }
22019 
22020  pm_constant_id_list_t locals;
22021  pm_locals_order(parser, &parser->current_scope->locals, &locals, true);
22022  pm_parser_scope_pop(parser);
22023 
22024  // If this is an empty file, then we're still going to parse all of the
22025  // statements in order to gather up all of the comments and such. Here we'll
22026  // correct the location information.
22027  if (pm_statements_node_body_length(statements) == 0) {
22028  pm_statements_node_location_set(statements, parser->start, parser->start);
22029  }
22030 
22031  // At the top level, see if we need to wrap the statements in a program
22032  // node with a while loop based on the options.
22034  statements = wrap_statements(parser, statements);
22035  } else {
22036  flush_block_exits(parser, previous_block_exits);
22037  pm_node_list_free(&current_block_exits);
22038  }
22039 
22040  return (pm_node_t *) pm_program_node_create(parser, &locals, statements);
22041 }
22042 
22043 /******************************************************************************/
22044 /* External functions */
22045 /******************************************************************************/
22046 
22056 static const char *
22057 pm_strnstr(const char *big, const char *little, size_t big_length) {
22058  size_t little_length = strlen(little);
22059 
22060  for (const char *big_end = big + big_length; big < big_end; big++) {
22061  if (*big == *little && memcmp(big, little, little_length) == 0) return big;
22062  }
22063 
22064  return NULL;
22065 }
22066 
22067 #ifdef _WIN32
22068 #define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
22069 #else
22075 static void
22076 pm_parser_warn_shebang_carriage_return(pm_parser_t *parser, const uint8_t *start, size_t length) {
22077  if (length > 2 && start[length - 2] == '\r' && start[length - 1] == '\n') {
22078  pm_parser_warn(parser, start, start + length, PM_WARN_SHEBANG_CARRIAGE_RETURN);
22079  }
22080 }
22081 #endif
22082 
22087 static void
22088 pm_parser_init_shebang(pm_parser_t *parser, const pm_options_t *options, const char *engine, size_t length) {
22089  const char *switches = pm_strnstr(engine, " -", length);
22090  if (switches == NULL) return;
22091 
22092  pm_options_t next_options = *options;
22093  options->shebang_callback(
22094  &next_options,
22095  (const uint8_t *) (switches + 1),
22096  length - ((size_t) (switches - engine)) - 1,
22097  options->shebang_callback_data
22098  );
22099 
22100  size_t encoding_length;
22101  if ((encoding_length = pm_string_length(&next_options.encoding)) > 0) {
22102  const uint8_t *encoding_source = pm_string_source(&next_options.encoding);
22103  parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22104  }
22105 
22106  parser->command_line = next_options.command_line;
22107  parser->frozen_string_literal = next_options.frozen_string_literal;
22108 }
22109 
22114 pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options) {
22115  assert(source != NULL);
22116 
22117  *parser = (pm_parser_t) {
22118  .node_id = 0,
22119  .lex_state = PM_LEX_STATE_BEG,
22120  .enclosure_nesting = 0,
22121  .lambda_enclosure_nesting = -1,
22122  .brace_nesting = 0,
22123  .do_loop_stack = 0,
22124  .accepts_block_stack = 0,
22125  .lex_modes = {
22126  .index = 0,
22127  .stack = {{ .mode = PM_LEX_DEFAULT }},
22128  .current = &parser->lex_modes.stack[0],
22129  },
22130  .start = source,
22131  .end = source + size,
22132  .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22133  .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
22134  .next_start = NULL,
22135  .heredoc_end = NULL,
22136  .data_loc = { .start = NULL, .end = NULL },
22137  .comment_list = { 0 },
22138  .magic_comment_list = { 0 },
22139  .warning_list = { 0 },
22140  .error_list = { 0 },
22141  .current_scope = NULL,
22142  .current_context = NULL,
22143  .encoding = PM_ENCODING_UTF_8_ENTRY,
22144  .encoding_changed_callback = NULL,
22145  .encoding_comment_start = source,
22146  .lex_callback = NULL,
22147  .filepath = { 0 },
22148  .constant_pool = { 0 },
22149  .newline_list = { 0 },
22150  .integer_base = 0,
22151  .current_string = PM_STRING_EMPTY,
22152  .start_line = 1,
22153  .explicit_encoding = NULL,
22154  .command_line = 0,
22155  .parsing_eval = false,
22156  .partial_script = false,
22157  .command_start = true,
22158  .recovering = false,
22159  .encoding_locked = false,
22160  .encoding_changed = false,
22161  .pattern_matching_newlines = false,
22162  .in_keyword_arg = false,
22163  .current_block_exits = NULL,
22164  .semantic_token_seen = false,
22165  .frozen_string_literal = PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET,
22166  .current_regular_expression_ascii_only = false,
22167  .warn_mismatched_indentation = true
22168  };
22169 
22170  // Initialize the constant pool. We're going to completely guess as to the
22171  // number of constants that we'll need based on the size of the input. The
22172  // ratio we chose here is actually less arbitrary than you might think.
22173  //
22174  // We took ~50K Ruby files and measured the size of the file versus the
22175  // number of constants that were found in those files. Then we found the
22176  // average and standard deviation of the ratios of constants/bytesize. Then
22177  // we added 1.34 standard deviations to the average to get a ratio that
22178  // would fit 75% of the files (for a two-tailed distribution). This works
22179  // because there was about a 0.77 correlation and the distribution was
22180  // roughly normal.
22181  //
22182  // This ratio will need to change if we add more constants to the constant
22183  // pool for another node type.
22184  uint32_t constant_size = ((uint32_t) size) / 95;
22185  pm_constant_pool_init(&parser->constant_pool, constant_size < 4 ? 4 : constant_size);
22186 
22187  // Initialize the newline list. Similar to the constant pool, we're going to
22188  // guess at the number of newlines that we'll need based on the size of the
22189  // input.
22190  size_t newline_size = size / 22;
22191  pm_newline_list_init(&parser->newline_list, source, newline_size < 4 ? 4 : newline_size);
22192 
22193  // If options were provided to this parse, establish them here.
22194  if (options != NULL) {
22195  // filepath option
22196  parser->filepath = options->filepath;
22197 
22198  // line option
22199  parser->start_line = options->line;
22200 
22201  // encoding option
22202  size_t encoding_length = pm_string_length(&options->encoding);
22203  if (encoding_length > 0) {
22204  const uint8_t *encoding_source = pm_string_source(&options->encoding);
22205  parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22206  }
22207 
22208  // encoding_locked option
22209  parser->encoding_locked = options->encoding_locked;
22210 
22211  // frozen_string_literal option
22212  parser->frozen_string_literal = options->frozen_string_literal;
22213 
22214  // command_line option
22215  parser->command_line = options->command_line;
22216 
22217  // version option
22218  parser->version = options->version;
22219 
22220  // partial_script
22221  parser->partial_script = options->partial_script;
22222 
22223  // scopes option
22224  parser->parsing_eval = options->scopes_count > 0;
22225  if (parser->parsing_eval) parser->warn_mismatched_indentation = false;
22226 
22227  for (size_t scope_index = 0; scope_index < options->scopes_count; scope_index++) {
22228  const pm_options_scope_t *scope = pm_options_scope_get(options, scope_index);
22229  pm_parser_scope_push(parser, scope_index == 0);
22230 
22231  // Scopes given from the outside are not allowed to have numbered
22232  // parameters.
22233  parser->current_scope->parameters |= PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED;
22234 
22235  for (size_t local_index = 0; local_index < scope->locals_count; local_index++) {
22236  const pm_string_t *local = pm_options_scope_local_get(scope, local_index);
22237 
22238  const uint8_t *source = pm_string_source(local);
22239  size_t length = pm_string_length(local);
22240 
22241  void *allocated = xmalloc(length);
22242  if (allocated == NULL) continue;
22243 
22244  memcpy(allocated, source, length);
22245  pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22246  }
22247  }
22248  }
22249 
22250  pm_accepts_block_stack_push(parser, true);
22251 
22252  // Skip past the UTF-8 BOM if it exists.
22253  if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22254  parser->current.end += 3;
22255  parser->encoding_comment_start += 3;
22256 
22257  if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
22259  if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
22260  }
22261  }
22262 
22263  // If the -x command line flag is set, or the first shebang of the file does
22264  // not include "ruby", then we'll search for a shebang that does include
22265  // "ruby" and start parsing from there.
22266  bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22267 
22268  // If the first two bytes of the source are a shebang, then we will do a bit
22269  // of extra processing.
22270  //
22271  // First, we'll indicate that the encoding comment is at the end of the
22272  // shebang. This means that when a shebang is present the encoding comment
22273  // can begin on the second line.
22274  //
22275  // Second, we will check if the shebang includes "ruby". If it does, then we
22276  // we will start parsing from there. We will also potentially warning the
22277  // user if there is a carriage return at the end of the shebang. We will
22278  // also potentially call the shebang callback if this is the main script to
22279  // allow the caller to parse the shebang and find any command-line options.
22280  // If the shebang does not include "ruby" and this is the main script being
22281  // parsed, then we will start searching the file for a shebang that does
22282  // contain "ruby" as if -x were passed on the command line.
22283  const uint8_t *newline = next_newline(parser->start, parser->end - parser->start);
22284  size_t length = (size_t) ((newline != NULL ? newline : parser->end) - parser->start);
22285 
22286  if (length > 2 && parser->current.end[0] == '#' && parser->current.end[1] == '!') {
22287  const char *engine;
22288 
22289  if ((engine = pm_strnstr((const char *) parser->start, "ruby", length)) != NULL) {
22290  if (newline != NULL) {
22291  parser->encoding_comment_start = newline + 1;
22292 
22293  if (options == NULL || options->main_script) {
22294  pm_parser_warn_shebang_carriage_return(parser, parser->start, length + 1);
22295  }
22296  }
22297 
22298  if (options != NULL && options->main_script && options->shebang_callback != NULL) {
22299  pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) parser->start)));
22300  }
22301 
22302  search_shebang = false;
22303  } else if (options->main_script && !parser->parsing_eval) {
22304  search_shebang = true;
22305  }
22306  }
22307 
22308  // Here we're going to find the first shebang that includes "ruby" and start
22309  // parsing from there.
22310  if (search_shebang) {
22311  // If a shebang that includes "ruby" is not found, then we're going to a
22312  // a load error to the list of errors on the parser.
22313  bool found_shebang = false;
22314 
22315  // This is going to point to the start of each line as we check it.
22316  // We'll maintain a moving window looking at each line at they come.
22317  const uint8_t *cursor = parser->start;
22318 
22319  // The newline pointer points to the end of the current line that we're
22320  // considering. If it is NULL, then we're at the end of the file.
22321  const uint8_t *newline = next_newline(cursor, parser->end - cursor);
22322 
22323  while (newline != NULL) {
22324  pm_newline_list_append(&parser->newline_list, newline);
22325 
22326  cursor = newline + 1;
22327  newline = next_newline(cursor, parser->end - cursor);
22328 
22329  size_t length = (size_t) ((newline != NULL ? newline : parser->end) - cursor);
22330  if (length > 2 && cursor[0] == '#' && cursor[1] == '!') {
22331  const char *engine;
22332  if ((engine = pm_strnstr((const char *) cursor, "ruby", length)) != NULL) {
22333  found_shebang = true;
22334 
22335  if (newline != NULL) {
22336  pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22337  parser->encoding_comment_start = newline + 1;
22338  }
22339 
22340  if (options != NULL && options->shebang_callback != NULL) {
22341  pm_parser_init_shebang(parser, options, engine, length - ((size_t) (engine - (const char *) cursor)));
22342  }
22343 
22344  break;
22345  }
22346  }
22347  }
22348 
22349  if (found_shebang) {
22350  parser->previous = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22351  parser->current = (pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22352  } else {
22353  pm_parser_err(parser, parser->start, parser->start, PM_ERR_SCRIPT_NOT_FOUND);
22355  }
22356  }
22357 
22358  // The encoding comment can start after any amount of inline whitespace, so
22359  // here we'll advance it to the first non-inline-whitespace character so
22360  // that it is ready for future comparisons.
22362 }
22363 
22370  parser->encoding_changed_callback = callback;
22371 }
22372 
22376 static inline void
22377 pm_comment_list_free(pm_list_t *list) {
22378  pm_list_node_t *node, *next;
22379 
22380  for (node = list->head; node != NULL; node = next) {
22381  next = node->next;
22382 
22383  pm_comment_t *comment = (pm_comment_t *) node;
22384  xfree(comment);
22385  }
22386 }
22387 
22391 static inline void
22392 pm_magic_comment_list_free(pm_list_t *list) {
22393  pm_list_node_t *node, *next;
22394 
22395  for (node = list->head; node != NULL; node = next) {
22396  next = node->next;
22397 
22400  }
22401 }
22402 
22408  pm_string_free(&parser->filepath);
22411  pm_comment_list_free(&parser->comment_list);
22412  pm_magic_comment_list_free(&parser->magic_comment_list);
22415 
22416  while (parser->current_scope != NULL) {
22417  // Normally, popping the scope doesn't free the locals since it is
22418  // assumed that ownership has transferred to the AST. However if we have
22419  // scopes while we're freeing the parser, it's likely they came from
22420  // eval scopes and we need to free them explicitly here.
22421  pm_parser_scope_pop(parser);
22422  }
22423 
22424  while (parser->lex_modes.index >= PM_LEX_STACK_SIZE) {
22425  lex_mode_pop(parser);
22426  }
22427 }
22428 
22434  return parse_program(parser);
22435 }
22436 
22442 static bool
22443 pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) {
22444 #define LINE_SIZE 4096
22445  char line[LINE_SIZE];
22446 
22447  while (memset(line, '\n', LINE_SIZE), fgets(line, LINE_SIZE, stream) != NULL) {
22448  size_t length = LINE_SIZE;
22449  while (length > 0 && line[length - 1] == '\n') length--;
22450 
22451  if (length == LINE_SIZE) {
22452  // If we read a line that is the maximum size and it doesn't end
22453  // with a newline, then we'll just append it to the buffer and
22454  // continue reading.
22455  length--;
22456  pm_buffer_append_string(buffer, line, length);
22457  continue;
22458  }
22459 
22460  // Append the line to the buffer.
22461  length--;
22462  pm_buffer_append_string(buffer, line, length);
22463 
22464  // Check if the line matches the __END__ marker. If it does, then stop
22465  // reading and return false. In most circumstances, this means we should
22466  // stop reading from the stream so that the DATA constant can pick it
22467  // up.
22468  switch (length) {
22469  case 7:
22470  if (strncmp(line, "__END__", 7) == 0) return false;
22471  break;
22472  case 8:
22473  if (strncmp(line, "__END__\n", 8) == 0) return false;
22474  break;
22475  case 9:
22476  if (strncmp(line, "__END__\r\n", 9) == 0) return false;
22477  break;
22478  }
22479  }
22480 
22481  return true;
22482 #undef LINE_SIZE
22483 }
22484 
22494 static bool
22495 pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) {
22496  pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head;
22497 
22498  for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) {
22499  if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) {
22500  return true;
22501  }
22502  }
22503 
22504  return false;
22505 }
22506 
22514 pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) {
22515  pm_buffer_init(buffer);
22516 
22517  bool eof = pm_parse_stream_read(buffer, stream, fgets);
22518  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22519  pm_node_t *node = pm_parse(parser);
22520 
22521  while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) {
22522  pm_node_destroy(parser, node);
22523  eof = pm_parse_stream_read(buffer, stream, fgets);
22524 
22525  pm_parser_free(parser);
22526  pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options);
22527  node = pm_parse(parser);
22528  }
22529 
22530  return node;
22531 }
22532 
22537 pm_parse_success_p(const uint8_t *source, size_t size, const char *data) {
22538  pm_options_t options = { 0 };
22539  pm_options_read(&options, data);
22540 
22541  pm_parser_t parser;
22542  pm_parser_init(&parser, source, size, &options);
22543 
22544  pm_node_t *node = pm_parse(&parser);
22545  pm_node_destroy(&parser, node);
22546 
22547  bool result = parser.error_list.size == 0;
22548  pm_parser_free(&parser);
22549  pm_options_free(&options);
22550 
22551  return result;
22552 }
22553 
22554 #undef PM_CASE_KEYWORD
22555 #undef PM_CASE_OPERATOR
22556 #undef PM_CASE_WRITABLE
22557 #undef PM_STRING_EMPTY
22558 #undef PM_LOCATION_NODE_BASE_VALUE
22559 #undef PM_LOCATION_NODE_VALUE
22560 #undef PM_LOCATION_NULL_VALUE
22561 #undef PM_LOCATION_TOKEN_VALUE
22562 
22563 // We optionally support serializing to a binary string. For systems that don't
22564 // want or need this functionality, it can be turned off with the
22565 // PRISM_EXCLUDE_SERIALIZATION define.
22566 #ifndef PRISM_EXCLUDE_SERIALIZATION
22567 
22568 static inline void
22569 pm_serialize_header(pm_buffer_t *buffer) {
22570  pm_buffer_append_string(buffer, "PRISM", 5);
22575 }
22576 
22581 pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
22582  pm_serialize_header(buffer);
22583  pm_serialize_content(parser, node, buffer);
22584  pm_buffer_append_byte(buffer, '\0');
22585 }
22586 
22592 pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22593  pm_options_t options = { 0 };
22594  pm_options_read(&options, data);
22595 
22596  pm_parser_t parser;
22597  pm_parser_init(&parser, source, size, &options);
22598 
22599  pm_node_t *node = pm_parse(&parser);
22600 
22601  pm_serialize_header(buffer);
22602  pm_serialize_content(&parser, node, buffer);
22603  pm_buffer_append_byte(buffer, '\0');
22604 
22605  pm_node_destroy(&parser, node);
22606  pm_parser_free(&parser);
22607  pm_options_free(&options);
22608 }
22609 
22615 pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) {
22616  pm_parser_t parser;
22617  pm_options_t options = { 0 };
22618  pm_options_read(&options, data);
22619 
22620  pm_buffer_t parser_buffer;
22621  pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options);
22622  pm_serialize_header(buffer);
22623  pm_serialize_content(&parser, node, buffer);
22624  pm_buffer_append_byte(buffer, '\0');
22625 
22626  pm_node_destroy(&parser, node);
22627  pm_buffer_free(&parser_buffer);
22628  pm_parser_free(&parser);
22629  pm_options_free(&options);
22630 }
22631 
22636 pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data) {
22637  pm_options_t options = { 0 };
22638  pm_options_read(&options, data);
22639 
22640  pm_parser_t parser;
22641  pm_parser_init(&parser, source, size, &options);
22642 
22643  pm_node_t *node = pm_parse(&parser);
22644  pm_serialize_header(buffer);
22645  pm_serialize_encoding(parser.encoding, buffer);
22646  pm_buffer_append_varsint(buffer, parser.start_line);
22647  pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
22648 
22649  pm_node_destroy(&parser, node);
22650  pm_parser_free(&parser);
22651  pm_options_free(&options);
22652 }
22653 
22654 #endif
22655 
22656 /******************************************************************************/
22657 /* Slice queries for the Ruby API */
22658 /******************************************************************************/
22659 
22661 typedef enum {
22663  PM_SLICE_TYPE_ERROR = -1,
22664 
22666  PM_SLICE_TYPE_NONE,
22667 
22669  PM_SLICE_TYPE_LOCAL,
22670 
22672  PM_SLICE_TYPE_CONSTANT,
22673 
22675  PM_SLICE_TYPE_METHOD_NAME
22676 } pm_slice_type_t;
22677 
22681 pm_slice_type_t
22682 pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22683  // first, get the right encoding object
22684  const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22685  if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22686 
22687  // check that there is at least one character
22688  if (length == 0) return PM_SLICE_TYPE_NONE;
22689 
22690  size_t width;
22691  if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22692  // valid because alphabetical
22693  } else if (*source == '_') {
22694  // valid because underscore
22695  width = 1;
22696  } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22697  // valid because multibyte
22698  } else {
22699  // invalid because no match
22700  return PM_SLICE_TYPE_NONE;
22701  }
22702 
22703  // determine the type of the slice based on the first character
22704  const uint8_t *end = source + length;
22705  pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22706 
22707  // next, iterate through all of the bytes of the string to ensure that they
22708  // are all valid identifier characters
22709  source += width;
22710 
22711  while (source < end) {
22712  if ((width = encoding->alnum_char(source, end - source)) != 0) {
22713  // valid because alphanumeric
22714  source += width;
22715  } else if (*source == '_') {
22716  // valid because underscore
22717  source++;
22718  } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22719  // valid because multibyte
22720  source += width;
22721  } else {
22722  // invalid because no match
22723  break;
22724  }
22725  }
22726 
22727  // accept a ! or ? at the end of the slice as a method name
22728  if (*source == '!' || *source == '?' || *source == '=') {
22729  source++;
22730  result = PM_SLICE_TYPE_METHOD_NAME;
22731  }
22732 
22733  // valid if we are at the end of the slice
22734  return source == end ? result : PM_SLICE_TYPE_NONE;
22735 }
22736 
22741 pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22742  switch (pm_slice_type(source, length, encoding_name)) {
22743  case PM_SLICE_TYPE_ERROR:
22744  return PM_STRING_QUERY_ERROR;
22745  case PM_SLICE_TYPE_NONE:
22746  case PM_SLICE_TYPE_CONSTANT:
22747  case PM_SLICE_TYPE_METHOD_NAME:
22748  return PM_STRING_QUERY_FALSE;
22749  case PM_SLICE_TYPE_LOCAL:
22750  return PM_STRING_QUERY_TRUE;
22751  }
22752 
22753  assert(false && "unreachable");
22754  return PM_STRING_QUERY_FALSE;
22755 }
22756 
22761 pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22762  switch (pm_slice_type(source, length, encoding_name)) {
22763  case PM_SLICE_TYPE_ERROR:
22764  return PM_STRING_QUERY_ERROR;
22765  case PM_SLICE_TYPE_NONE:
22766  case PM_SLICE_TYPE_LOCAL:
22767  case PM_SLICE_TYPE_METHOD_NAME:
22768  return PM_STRING_QUERY_FALSE;
22769  case PM_SLICE_TYPE_CONSTANT:
22770  return PM_STRING_QUERY_TRUE;
22771  }
22772 
22773  assert(false && "unreachable");
22774  return PM_STRING_QUERY_FALSE;
22775 }
22776 
22781 pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22782 #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22783 #define C1(c) (*source == c)
22784 #define C2(s) (memcmp(source, s, 2) == 0)
22785 #define C3(s) (memcmp(source, s, 3) == 0)
22786 
22787  switch (pm_slice_type(source, length, encoding_name)) {
22788  case PM_SLICE_TYPE_ERROR:
22789  return PM_STRING_QUERY_ERROR;
22790  case PM_SLICE_TYPE_NONE:
22791  break;
22792  case PM_SLICE_TYPE_LOCAL:
22793  // numbered parameters are not valid method names
22794  return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22795  case PM_SLICE_TYPE_CONSTANT:
22796  // all constants are valid method names
22797  case PM_SLICE_TYPE_METHOD_NAME:
22798  // all method names are valid method names
22799  return PM_STRING_QUERY_TRUE;
22800  }
22801 
22802  switch (length) {
22803  case 1:
22804  return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22805  case 2:
22806  return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22807  case 3:
22808  return B(C3("===") || C3("<=>") || C3("[]="));
22809  default:
22810  return PM_STRING_QUERY_FALSE;
22811  }
22812 
22813 #undef B
22814 #undef C1
22815 #undef C2
22816 #undef C3
22817 }
@ PM_RANGE_FLAGS_EXCLUDE_END
...
Definition: ast.h:7430
@ PM_DEFINED_NODE
DefinedNode.
Definition: ast.h:709
@ PM_PRE_EXECUTION_NODE
PreExecutionNode.
Definition: ast.h:931
@ PM_RETRY_NODE
RetryNode.
Definition: ast.h:964
@ PM_REDO_NODE
RedoNode.
Definition: ast.h:943
@ PM_CONSTANT_PATH_WRITE_NODE
ConstantPathWriteNode.
Definition: ast.h:694
@ PM_SOURCE_LINE_NODE
SourceLineNode.
Definition: ast.h:985
@ PM_UNLESS_NODE
UnlessNode.
Definition: ast.h:1009
@ PM_CALL_NODE
CallNode.
Definition: ast.h:628
@ PM_NIL_NODE
NilNode.
Definition: ast.h:895
@ PM_GLOBAL_VARIABLE_READ_NODE
GlobalVariableReadNode.
Definition: ast.h:757
@ PM_RATIONAL_NODE
RationalNode.
Definition: ast.h:940
@ PM_FIND_PATTERN_NODE
FindPatternNode.
Definition: ast.h:727
@ PM_ARRAY_NODE
ArrayNode.
Definition: ast.h:589
@ PM_CONSTANT_PATH_TARGET_NODE
ConstantPathTargetNode.
Definition: ast.h:691
@ PM_OR_NODE
OrNode.
Definition: ast.h:913
@ PM_MULTI_WRITE_NODE
MultiWriteNode.
Definition: ast.h:889
@ PM_IF_NODE
IfNode.
Definition: ast.h:772
@ PM_INTERPOLATED_STRING_NODE
InterpolatedStringNode.
Definition: ast.h:826
@ PM_FALSE_NODE
FalseNode.
Definition: ast.h:724
@ PM_HASH_NODE
HashNode.
Definition: ast.h:766
@ PM_MATCH_PREDICATE_NODE
MatchPredicateNode.
Definition: ast.h:871
@ PM_X_STRING_NODE
XStringNode.
Definition: ast.h:1021
@ PM_GLOBAL_VARIABLE_TARGET_NODE
GlobalVariableTargetNode.
Definition: ast.h:760
@ PM_AND_NODE
AndNode.
Definition: ast.h:583
@ PM_CONSTANT_TARGET_NODE
ConstantTargetNode.
Definition: ast.h:700
@ PM_IT_LOCAL_VARIABLE_READ_NODE
ItLocalVariableReadNode.
Definition: ast.h:835
@ PM_SOURCE_FILE_NODE
SourceFileNode.
Definition: ast.h:982
@ PM_NO_KEYWORDS_PARAMETER_NODE
NoKeywordsParameterNode.
Definition: ast.h:898
@ PM_MULTI_TARGET_NODE
MultiTargetNode.
Definition: ast.h:886
@ PM_SPLAT_NODE
SplatNode.
Definition: ast.h:988
@ PM_CLASS_VARIABLE_READ_NODE
ClassVariableReadNode.
Definition: ast.h:661
@ PM_ELSE_NODE
ElseNode.
Definition: ast.h:712
@ PM_INTERPOLATED_MATCH_LAST_LINE_NODE
InterpolatedMatchLastLineNode.
Definition: ast.h:820
@ PM_SYMBOL_NODE
SymbolNode.
Definition: ast.h:1000
@ PM_ALIAS_METHOD_NODE
AliasMethodNode.
Definition: ast.h:577
@ PM_MATCH_REQUIRED_NODE
MatchRequiredNode.
Definition: ast.h:874
@ PM_BACK_REFERENCE_READ_NODE
BackReferenceReadNode.
Definition: ast.h:601
@ PM_BLOCK_ARGUMENT_NODE
BlockArgumentNode.
Definition: ast.h:607
@ PM_MISSING_NODE
MissingNode.
Definition: ast.h:880
@ PM_SELF_NODE
SelfNode.
Definition: ast.h:970
@ PM_TRUE_NODE
TrueNode.
Definition: ast.h:1003
@ PM_ASSOC_SPLAT_NODE
AssocSplatNode.
Definition: ast.h:598
@ PM_RANGE_NODE
RangeNode.
Definition: ast.h:937
@ PM_LOCAL_VARIABLE_READ_NODE
LocalVariableReadNode.
Definition: ast.h:859
@ PM_NEXT_NODE
NextNode.
Definition: ast.h:892
@ PM_REGULAR_EXPRESSION_NODE
RegularExpressionNode.
Definition: ast.h:946
@ PM_CONSTANT_WRITE_NODE
ConstantWriteNode.
Definition: ast.h:703
@ PM_HASH_PATTERN_NODE
HashPatternNode.
Definition: ast.h:769
@ PM_UNDEF_NODE
UndefNode.
Definition: ast.h:1006
@ PM_ENSURE_NODE
EnsureNode.
Definition: ast.h:721
@ PM_LOCAL_VARIABLE_WRITE_NODE
LocalVariableWriteNode.
Definition: ast.h:865
@ PM_KEYWORD_HASH_NODE
KeywordHashNode.
Definition: ast.h:841
@ PM_PARENTHESES_NODE
ParenthesesNode.
Definition: ast.h:919
@ PM_CLASS_VARIABLE_WRITE_NODE
ClassVariableWriteNode.
Definition: ast.h:667
@ PM_POST_EXECUTION_NODE
PostExecutionNode.
Definition: ast.h:928
@ PM_RETURN_NODE
ReturnNode.
Definition: ast.h:967
@ PM_ARRAY_PATTERN_NODE
ArrayPatternNode.
Definition: ast.h:592
@ PM_MATCH_LAST_LINE_NODE
MatchLastLineNode.
Definition: ast.h:868
@ PM_CONSTANT_PATH_NODE
ConstantPathNode.
Definition: ast.h:682
@ PM_INTERPOLATED_SYMBOL_NODE
InterpolatedSymbolNode.
Definition: ast.h:829
@ PM_CLASS_VARIABLE_TARGET_NODE
ClassVariableTargetNode.
Definition: ast.h:664
@ PM_BREAK_NODE
BreakNode.
Definition: ast.h:622
@ PM_IMAGINARY_NODE
ImaginaryNode.
Definition: ast.h:775
@ PM_CONSTANT_READ_NODE
ConstantReadNode.
Definition: ast.h:697
@ PM_GLOBAL_VARIABLE_WRITE_NODE
GlobalVariableWriteNode.
Definition: ast.h:763
@ PM_SOURCE_ENCODING_NODE
SourceEncodingNode.
Definition: ast.h:979
@ PM_BEGIN_NODE
BeginNode.
Definition: ast.h:604
@ PM_INSTANCE_VARIABLE_READ_NODE
InstanceVariableReadNode.
Definition: ast.h:808
@ PM_FLIP_FLOP_NODE
FlipFlopNode.
Definition: ast.h:730
@ PM_INSTANCE_VARIABLE_WRITE_NODE
InstanceVariableWriteNode.
Definition: ast.h:814
@ PM_INSTANCE_VARIABLE_TARGET_NODE
InstanceVariableTargetNode.
Definition: ast.h:811
@ PM_CASE_NODE
CaseNode.
Definition: ast.h:646
@ PM_FLOAT_NODE
FloatNode.
Definition: ast.h:733
@ PM_ASSOC_NODE
AssocNode.
Definition: ast.h:595
@ PM_INTEGER_NODE
IntegerNode.
Definition: ast.h:817
@ PM_LOCAL_VARIABLE_TARGET_NODE
LocalVariableTargetNode.
Definition: ast.h:862
@ PM_STRING_NODE
StringNode.
Definition: ast.h:994
@ PM_ALIAS_GLOBAL_VARIABLE_NODE
AliasGlobalVariableNode.
Definition: ast.h:574
@ PM_NUMBERED_REFERENCE_READ_NODE
NumberedReferenceReadNode.
Definition: ast.h:904
@ PM_STATEMENTS_NODE
StatementsNode.
Definition: ast.h:991
@ PM_BLOCK_NODE
BlockNode.
Definition: ast.h:613
@ PM_INTERPOLATED_REGULAR_EXPRESSION_NODE
InterpolatedRegularExpressionNode.
Definition: ast.h:823
static const pm_node_flags_t PM_NODE_FLAG_NEWLINE
We store the flags enum in every node in the tree.
Definition: ast.h:1046
@ PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition: ast.h:7513
@ PM_STRING_FLAGS_FROZEN
frozen by virtue of a frozen_string_literal: true comment or --enable-frozen-string-literal
Definition: ast.h:7496
@ PM_STRING_FLAGS_FORCED_BINARY_ENCODING
internal bytes forced the encoding to binary
Definition: ast.h:7493
@ PM_STRING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition: ast.h:7490
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING
if the arguments contain forwarding
Definition: ast.h:7322
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS
if the arguments contain keywords
Definition: ast.h:7325
@ PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT
if the arguments contain a keyword splat
Definition: ast.h:7328
#define PM_NODE_FLAG_P(node, flag)
Return true if the given flag is set on the given node.
Definition: ast.h:1063
#define PM_NODE_TYPE_P(node, type)
Return true if the type of the given node matches the given type.
Definition: ast.h:1058
#define PM_NODE_TYPE(node)
Cast the type to an enum to allow the compiler to provide exhaustiveness checking.
Definition: ast.h:1053
@ PM_INTEGER_BASE_FLAGS_HEXADECIMAL
0x prefix
Definition: ast.h:7387
@ PM_INTEGER_BASE_FLAGS_OCTAL
0o or 0 prefix
Definition: ast.h:7384
@ PM_INTEGER_BASE_FLAGS_DECIMAL
0d or no prefix
Definition: ast.h:7381
@ PM_INTEGER_BASE_FLAGS_BINARY
0b prefix
Definition: ast.h:7378
enum pm_token_type pm_token_type_t
This enum represents every type of token in the Ruby source.
#define PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
When we're serializing to Java, we want to skip serializing the location fields as they won't be used...
Definition: ast.h:7522
@ PM_CALL_NODE_FLAGS_SAFE_NAVIGATION
&.
Definition: ast.h:7350
@ PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE
a call that is an attribute write, so the value being written should be returned
Definition: ast.h:7356
@ PM_CALL_NODE_FLAGS_VARIABLE_CALL
a call that could have been a local variable
Definition: ast.h:7353
@ PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING
internal bytes forced the encoding to US-ASCII
Definition: ast.h:7468
@ PM_REGULAR_EXPRESSION_FLAGS_EXTENDED
x - ignores whitespace and allows comments in regular expressions
Definition: ast.h:7441
uint16_t pm_node_flags_t
These are the flags embedded in the node struct.
Definition: ast.h:1040
@ PM_TOKEN_STAR_STAR
**
Definition: ast.h:469
@ PM_TOKEN_DOT_DOT_DOT
the ...
Definition: ast.h:124
@ PM_TOKEN_MINUS_EQUAL
-=
Definition: ast.h:385
@ PM_TOKEN_IGNORED_NEWLINE
an ignored newline
Definition: ast.h:196
@ PM_TOKEN_BANG_EQUAL
!=
Definition: ast.h:64
@ PM_TOKEN_KEYWORD___FILE__
FILE
Definition: ast.h:349
@ PM_TOKEN_KEYWORD_WHEN
when
Definition: ast.h:334
@ PM_TOKEN_FLOAT
a floating point number
Definition: ast.h:160
@ PM_TOKEN_PLUS_EQUAL
+=
Definition: ast.h:442
@ PM_TOKEN_DOT_DOT
the .
Definition: ast.h:121
@ PM_TOKEN_UDOT_DOT
unary .
Definition: ast.h:496
@ PM_TOKEN_AMPERSAND_DOT
&.
Definition: ast.h:49
@ PM_TOKEN_NEWLINE
a newline character outside of other tokens
Definition: ast.h:391
@ PM_TOKEN_NUMBERED_REFERENCE
a numbered reference to a capture group in the previous regular expression match
Definition: ast.h:394
@ PM_TOKEN_AMPERSAND
&
Definition: ast.h:40
@ PM_TOKEN_KEYWORD_YIELD
yield
Definition: ast.h:343
@ PM_TOKEN_KEYWORD_END
end
Definition: ast.h:253
@ PM_TOKEN_LAMBDA_BEGIN
{
Definition: ast.h:361
@ PM_TOKEN_KEYWORD_UNTIL_MODIFIER
until in the modifier form
Definition: ast.h:331
@ PM_TOKEN_EQUAL_EQUAL_EQUAL
===
Definition: ast.h:151
@ PM_TOKEN_INTEGER_RATIONAL
an integer with a rational suffix
Definition: ast.h:208
@ PM_TOKEN_USTAR
unary *
Definition: ast.h:511
@ PM_TOKEN_TILDE
~ or ~@
Definition: ast.h:487
@ PM_TOKEN_KEYWORD___ENCODING__
ENCODING
Definition: ast.h:346
@ PM_TOKEN_REGEXP_END
the end of a regular expression
Definition: ast.h:451
@ PM_TOKEN_KEYWORD_UNTIL
until
Definition: ast.h:328
@ PM_TOKEN_COMMA
,
Definition: ast.h:109
@ PM_TOKEN_MAXIMUM
The maximum token value.
Definition: ast.h:523
@ PM_TOKEN_GREATER
Definition: ast.h:175
@ PM_TOKEN_INTEGER
an integer (any base)
Definition: ast.h:202
@ PM_TOKEN_SLASH_EQUAL
/=
Definition: ast.h:460
@ PM_TOKEN_UMINUS_NUM
-@ for a number
Definition: ast.h:505
@ PM_TOKEN_EMBVAR
Definition: ast.h:142
@ PM_TOKEN_KEYWORD_UNLESS_MODIFIER
unless in the modifier form
Definition: ast.h:325
@ PM_TOKEN_INTEGER_RATIONAL_IMAGINARY
an integer with a rational and imaginary suffix
Definition: ast.h:211
@ PM_TOKEN_FLOAT_RATIONAL_IMAGINARY
a floating pointer number with a rational and imaginary suffix
Definition: ast.h:169
@ PM_TOKEN_BRACKET_LEFT_RIGHT
[]
Definition: ast.h:82
@ PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL
&&=
Definition: ast.h:46
@ PM_TOKEN_KEYWORD_CLASS
class
Definition: ast.h:232
@ PM_TOKEN_KEYWORD_BEGIN
begin
Definition: ast.h:220
@ PM_TOKEN_NOT_PROVIDED
a token that was not present but it is okay
Definition: ast.h:37
@ PM_TOKEN_USTAR_STAR
unary **
Definition: ast.h:514
@ PM_TOKEN_GREATER_GREATER_EQUAL
>>=
Definition: ast.h:184
@ PM_TOKEN_PERCENT_EQUAL
%=
Definition: ast.h:409
@ PM_TOKEN_PERCENT
%
Definition: ast.h:406
@ PM_TOKEN_KEYWORD_IN
in
Definition: ast.h:274
@ PM_TOKEN_BANG
! or !@
Definition: ast.h:61
@ PM_TOKEN_KEYWORD_NOT
not
Definition: ast.h:286
@ PM_TOKEN_BRACKET_LEFT_ARRAY
[ for the beginning of an array
Definition: ast.h:79
@ PM_TOKEN_HEREDOC_END
the end of a heredoc
Definition: ast.h:187
@ PM_TOKEN_HEREDOC_START
the start of a heredoc
Definition: ast.h:190
@ PM_TOKEN_KEYWORD_DEFINED
defined?
Definition: ast.h:238
@ PM_TOKEN_UCOLON_COLON
unary ::
Definition: ast.h:493
@ PM_TOKEN_LABEL_END
the end of a label
Definition: ast.h:358
@ PM_TOKEN_EQUAL_GREATER
=>
Definition: ast.h:154
@ PM_TOKEN_KEYWORD_UNLESS
unless
Definition: ast.h:322
@ PM_TOKEN_KEYWORD_ENSURE
ensure
Definition: ast.h:259
@ PM_TOKEN_AMPERSAND_EQUAL
&=
Definition: ast.h:52
@ PM_TOKEN_EQUAL_EQUAL
==
Definition: ast.h:148
@ PM_TOKEN_UPLUS
+@
Definition: ast.h:508
@ PM_TOKEN_FLOAT_IMAGINARY
a floating pointer number with an imaginary suffix
Definition: ast.h:163
@ PM_TOKEN_KEYWORD_BEGIN_UPCASE
BEGIN.
Definition: ast.h:223
@ PM_TOKEN_LESS_EQUAL_GREATER
<=>
Definition: ast.h:370
@ PM_TOKEN_KEYWORD_RESCUE_MODIFIER
rescue in the modifier form
Definition: ast.h:298
@ PM_TOKEN_MISSING
a token that was expected but not found
Definition: ast.h:34
@ PM_TOKEN_MINUS_GREATER
->
Definition: ast.h:388
@ PM_TOKEN_KEYWORD_FALSE
false
Definition: ast.h:262
@ PM_TOKEN_PIPE_PIPE_EQUAL
||=
Definition: ast.h:436
@ PM_TOKEN_KEYWORD_IF
if
Definition: ast.h:268
@ PM_TOKEN_EMBEXPR_BEGIN
#{
Definition: ast.h:136
@ PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES
( for a parentheses node
Definition: ast.h:400
@ PM_TOKEN_EMBDOC_END
=end
Definition: ast.h:130
@ PM_TOKEN_KEYWORD_ELSE
else
Definition: ast.h:247
@ PM_TOKEN_BACK_REFERENCE
a back reference
Definition: ast.h:58
@ PM_TOKEN_BRACKET_LEFT
[
Definition: ast.h:76
@ PM_TOKEN_EOF
final token in the file
Definition: ast.h:31
@ PM_TOKEN_PIPE_PIPE
||
Definition: ast.h:433
@ PM_TOKEN_KEYWORD_NIL
nil
Definition: ast.h:283
@ PM_TOKEN_PERCENT_UPPER_W
W
Definition: ast.h:424
@ PM_TOKEN_KEYWORD_RETURN
return
Definition: ast.h:304
@ PM_TOKEN_CLASS_VARIABLE
a class variable
Definition: ast.h:100
@ PM_TOKEN_PIPE
|
Definition: ast.h:427
@ PM_TOKEN_PARENTHESIS_LEFT
(
Definition: ast.h:397
@ PM_TOKEN_BANG_TILDE
!~
Definition: ast.h:67
@ PM_TOKEN_DOT
the .
Definition: ast.h:118
@ PM_TOKEN_PARENTHESIS_RIGHT
)
Definition: ast.h:403
@ PM_TOKEN_KEYWORD_RESCUE
rescue
Definition: ast.h:295
@ PM_TOKEN_INSTANCE_VARIABLE
an instance variable
Definition: ast.h:199
@ PM_TOKEN_PIPE_EQUAL
|=
Definition: ast.h:430
@ PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL
[]=
Definition: ast.h:85
@ PM_TOKEN_UAMPERSAND
unary &
Definition: ast.h:490
@ PM_TOKEN_MINUS
Definition: ast.h:382
@ PM_TOKEN_CONSTANT
a constant
Definition: ast.h:115
@ PM_TOKEN_IDENTIFIER
an identifier
Definition: ast.h:193
@ PM_TOKEN_EMBDOC_BEGIN
=begin
Definition: ast.h:127
@ PM_TOKEN_STAR_EQUAL
*=
Definition: ast.h:466
@ PM_TOKEN_KEYWORD_OR
or
Definition: ast.h:289
@ PM_TOKEN_KEYWORD_AND
and
Definition: ast.h:217
@ PM_TOKEN_LESS
<
Definition: ast.h:364
@ PM_TOKEN_KEYWORD_BREAK
break
Definition: ast.h:226
@ PM_TOKEN_PERCENT_LOWER_W
w
Definition: ast.h:415
@ PM_TOKEN_SYMBOL_BEGIN
the beginning of a symbol
Definition: ast.h:484
@ PM_TOKEN_METHOD_NAME
a method name
Definition: ast.h:379
@ PM_TOKEN_KEYWORD_CASE
case
Definition: ast.h:229
@ PM_TOKEN_WORDS_SEP
a separator between words in a list
Definition: ast.h:517
@ PM_TOKEN_FLOAT_RATIONAL
a floating pointer number with a rational suffix
Definition: ast.h:166
@ PM_TOKEN_LESS_LESS_EQUAL
<<=
Definition: ast.h:376
@ PM_TOKEN_EMBDOC_LINE
a line inside of embedded documentation
Definition: ast.h:133
@ PM_TOKEN_KEYWORD_SUPER
super
Definition: ast.h:310
@ PM_TOKEN_KEYWORD_DO
do
Definition: ast.h:241
@ PM_TOKEN_KEYWORD_REDO
redo
Definition: ast.h:292
@ PM_TOKEN_EQUAL_TILDE
=~
Definition: ast.h:157
@ PM_TOKEN_EMBEXPR_END
}
Definition: ast.h:139
@ PM_TOKEN_KEYWORD_END_UPCASE
END.
Definition: ast.h:256
@ PM_TOKEN_KEYWORD___LINE__
LINE
Definition: ast.h:352
@ PM_TOKEN_STRING_END
the end of a string
Definition: ast.h:481
@ PM_TOKEN_STRING_CONTENT
the contents of a string
Definition: ast.h:478
@ PM_TOKEN_BRACE_LEFT
{
Definition: ast.h:70
@ PM_TOKEN_COLON_COLON
::
Definition: ast.h:106
@ PM_TOKEN_GREATER_GREATER
Definition: ast.h:181
@ PM_TOKEN_PERCENT_LOWER_X
x
Definition: ast.h:418
@ PM_TOKEN_KEYWORD_SELF
self
Definition: ast.h:307
@ PM_TOKEN_PERCENT_LOWER_I
i
Definition: ast.h:412
@ PM_TOKEN_KEYWORD_ALIAS
alias
Definition: ast.h:214
@ PM_TOKEN_GLOBAL_VARIABLE
a global variable
Definition: ast.h:172
@ PM_TOKEN_KEYWORD_IF_MODIFIER
if in the modifier form
Definition: ast.h:271
@ PM_TOKEN_SLASH
/
Definition: ast.h:457
@ PM_TOKEN_KEYWORD_RETRY
retry
Definition: ast.h:301
@ PM_TOKEN_COLON
:
Definition: ast.h:103
@ PM_TOKEN_KEYWORD_UNDEF
undef
Definition: ast.h:319
@ PM_TOKEN_BRACKET_RIGHT
]
Definition: ast.h:88
@ PM_TOKEN_KEYWORD_FOR
for
Definition: ast.h:265
@ PM_TOKEN_KEYWORD_THEN
then
Definition: ast.h:313
@ PM_TOKEN_QUESTION_MARK
?
Definition: ast.h:445
@ PM_TOKEN___END__
marker for the point in the file at which the parser should stop
Definition: ast.h:520
@ PM_TOKEN_KEYWORD_WHILE
while
Definition: ast.h:337
@ PM_TOKEN_EQUAL
=
Definition: ast.h:145
@ PM_TOKEN_KEYWORD_DEF
def
Definition: ast.h:235
@ PM_TOKEN_UDOT_DOT_DOT
unary ...
Definition: ast.h:499
@ PM_TOKEN_STAR
Definition: ast.h:463
@ PM_TOKEN_KEYWORD_WHILE_MODIFIER
while in the modifier form
Definition: ast.h:340
@ PM_TOKEN_KEYWORD_TRUE
true
Definition: ast.h:316
@ PM_TOKEN_BRACE_RIGHT
}
Definition: ast.h:73
@ PM_TOKEN_SEMICOLON
;
Definition: ast.h:454
@ PM_TOKEN_REGEXP_BEGIN
the beginning of a regular expression
Definition: ast.h:448
@ PM_TOKEN_CARET
^
Definition: ast.h:91
@ PM_TOKEN_PERCENT_UPPER_I
I
Definition: ast.h:421
@ PM_TOKEN_KEYWORD_DO_LOOP
do keyword for a predicate in a while, until, or for loop
Definition: ast.h:244
@ PM_TOKEN_KEYWORD_MODULE
module
Definition: ast.h:277
@ PM_TOKEN_PLUS
Definition: ast.h:439
@ PM_TOKEN_KEYWORD_NEXT
next
Definition: ast.h:280
@ PM_TOKEN_BACKTICK
`
Definition: ast.h:55
@ PM_TOKEN_INTEGER_IMAGINARY
an integer with an imaginary suffix
Definition: ast.h:205
@ PM_TOKEN_LABEL
a label
Definition: ast.h:355
@ PM_TOKEN_STAR_STAR_EQUAL
**=
Definition: ast.h:472
@ PM_TOKEN_CHARACTER_LITERAL
a character literal
Definition: ast.h:97
@ PM_TOKEN_AMPERSAND_AMPERSAND
&&
Definition: ast.h:43
@ PM_TOKEN_UMINUS
-@
Definition: ast.h:502
@ PM_TOKEN_LESS_LESS
<<
Definition: ast.h:373
@ PM_TOKEN_GREATER_EQUAL
>=
Definition: ast.h:178
@ PM_TOKEN_COMMENT
a comment
Definition: ast.h:112
@ PM_TOKEN_CARET_EQUAL
^=
Definition: ast.h:94
@ PM_TOKEN_KEYWORD_ELSIF
elsif
Definition: ast.h:250
@ PM_TOKEN_STRING_BEGIN
the beginning of a string
Definition: ast.h:475
@ PM_TOKEN_LESS_EQUAL
<=
Definition: ast.h:367
@ PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING
internal bytes forced the encoding to UTF-8
Definition: ast.h:7367
@ PM_LOOP_FLAGS_BEGIN_MODIFIER
a loop after a begin statement, so the body is executed first before the condition
Definition: ast.h:7414
void pm_diagnostic_list_free(pm_list_t *list)
Deallocate the internal state of the given diagnostic list.
Definition: diagnostic.c:831
bool pm_diagnostic_list_append_format(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id,...)
Append a diagnostic to the given list of diagnostics that is using a format string for its message.
Definition: diagnostic.c:787
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
Definition: diagnostic.h:29
bool pm_diagnostic_list_append(pm_list_t *list, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id)
Append a diagnostic to the given list of diagnostics that is using shared memory for its message.
Definition: diagnostic.c:766
#define xfree
Old name of ruby_xfree.
Definition: xmalloc.h:58
#define xmalloc
Old name of ruby_xmalloc.
Definition: xmalloc.h:53
#define xcalloc
Old name of ruby_xcalloc.
Definition: xmalloc.h:55
VALUE type(ANYARGS)
ANYARGS-ed function type.
Definition: cxxanyargs.hpp:56
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
Definition: options.c:181
void pm_options_read(pm_options_t *options, const char *data)
Deserialize an options struct from the given binary string.
Definition: options.c:238
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
Definition: options.h:185
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
Definition: options.h:20
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
Definition: options.c:154
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
Definition: options.h:31
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
Definition: options.h:26
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
Definition: options.c:173
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
Definition: options.h:191
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
Definition: options.h:71
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
Definition: parser.h:79
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
Definition: parser.h:262
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
Definition: parser.h:267
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
Definition: parser.h:46
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
Definition: parser.h:69
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition: parser.h:496
pm_context_t
While parsing, we keep track of a stack of contexts.
Definition: parser.h:274
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
Definition: parser.h:321
@ PM_CONTEXT_ELSIF
an elsif clause
Definition: parser.h:348
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
Definition: parser.h:333
@ PM_CONTEXT_ELSE
an else clause
Definition: parser.h:345
@ PM_CONTEXT_FOR_INDEX
a for loop's index
Definition: parser.h:357
@ PM_CONTEXT_CASE_WHEN
a case when statements
Definition: parser.h:306
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
Definition: parser.h:303
@ PM_CONTEXT_MODULE
a module declaration
Definition: parser.h:384
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
Definition: parser.h:336
@ PM_CONTEXT_CASE_IN
a case in statements
Definition: parser.h:309
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
Definition: parser.h:300
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
Definition: parser.h:378
@ PM_CONTEXT_SCLASS
a singleton class definition
Definition: parser.h:414
@ PM_CONTEXT_UNLESS
an unless statement
Definition: parser.h:429
@ PM_CONTEXT_POSTEXE
an END block
Definition: parser.h:402
@ PM_CONTEXT_IF
an if statement
Definition: parser.h:360
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
Definition: parser.h:396
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
Definition: parser.h:375
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
Definition: parser.h:285
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
Definition: parser.h:276
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
Definition: parser.h:318
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
Definition: parser.h:369
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
Definition: parser.h:297
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
Definition: parser.h:315
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
Definition: parser.h:363
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
Definition: parser.h:390
@ PM_CONTEXT_PARENS
a parenthesized expression
Definition: parser.h:399
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
Definition: parser.h:291
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
Definition: parser.h:327
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
Definition: parser.h:423
@ PM_CONTEXT_PREEXE
a BEGIN block
Definition: parser.h:408
@ PM_CONTEXT_DEFINED
a defined? expression
Definition: parser.h:339
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
Definition: parser.h:387
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
Definition: parser.h:288
@ PM_CONTEXT_UNTIL
an until statement
Definition: parser.h:432
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
Definition: parser.h:330
@ PM_CONTEXT_FOR
a for loop
Definition: parser.h:354
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
Definition: parser.h:405
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
Definition: parser.h:282
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
Definition: parser.h:417
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
Definition: parser.h:342
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
Definition: parser.h:372
@ PM_CONTEXT_CLASS
a class declaration
Definition: parser.h:312
@ PM_CONTEXT_MAIN
the top level context
Definition: parser.h:381
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
Definition: parser.h:366
@ PM_CONTEXT_BEGIN
a begin statement
Definition: parser.h:279
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
Definition: parser.h:411
@ PM_CONTEXT_EMBEXPR
an interpolated expression
Definition: parser.h:351
@ PM_CONTEXT_TERNARY
a ternary expression
Definition: parser.h:426
@ PM_CONTEXT_DEF
a method definition
Definition: parser.h:324
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
Definition: parser.h:420
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
Definition: parser.h:393
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
Definition: parser.h:294
@ PM_CONTEXT_WHILE
a while statement
Definition: parser.h:435
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
Definition: parser.h:522
pm_comment_type_t
This is the type of a comment that we've found while parsing.
Definition: parser.h:448
bool pm_buffer_init_capacity(pm_buffer_t *buffer, size_t capacity)
Initialize a pm_buffer_t with the given capacity.
Definition: pm_buffer.c:15
void pm_buffer_append_format(pm_buffer_t *buffer, const char *format,...) PRISM_ATTRIBUTE_FORMAT(2
Append a formatted string to the buffer.
void void pm_buffer_append_string(pm_buffer_t *buffer, const char *value, size_t length)
Append a string to the buffer.
Definition: pm_buffer.c:119
PRISM_EXPORTED_FUNCTION size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
Definition: pm_buffer.c:43
void pm_buffer_append_byte(pm_buffer_t *buffer, uint8_t value)
Append a single byte to the buffer.
Definition: pm_buffer.c:135
PRISM_EXPORTED_FUNCTION bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
Definition: pm_buffer.c:27
void pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value)
Append a 32-bit signed integer to the buffer as a variable-length integer.
Definition: pm_buffer.c:161
PRISM_EXPORTED_FUNCTION char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
Definition: pm_buffer.c:35
PRISM_EXPORTED_FUNCTION void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
Definition: pm_buffer.c:315
void pm_buffer_append_bytes(pm_buffer_t *buffer, const uint8_t *value, size_t length)
Append a list of bytes to the buffer.
Definition: pm_buffer.c:127
size_t pm_strspn_hexadecimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are hexadecimal digits.
Definition: pm_char.c:249
bool pm_char_is_decimal_digit(const uint8_t b)
Returns true if the given character is a decimal digit.
Definition: pm_char.c:295
size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are whitespace.
Definition: pm_char.c:76
size_t pm_strspn_hexadecimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are hexadecimal digits or underscore...
Definition: pm_char.c:263
size_t pm_strspn_decimal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are decimal digits or underscores.
Definition: pm_char.c:239
size_t pm_strspn_decimal_digit(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are decimal digits.
Definition: pm_char.c:225
size_t pm_strspn_binary_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are binary digits or underscores.
Definition: pm_char.c:202
size_t pm_strspn_octal_number(const uint8_t *string, ptrdiff_t length, const uint8_t **invalid)
Returns the number of characters at the start of the string that are octal digits or underscores.
Definition: pm_char.c:216
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list)
Returns the number of characters at the start of the string that are whitespace while also tracking t...
Definition: pm_char.c:86
bool pm_char_is_hexadecimal_digit(const uint8_t b)
Returns true if the given character is a hexadecimal digit.
Definition: pm_char.c:303
bool pm_char_is_octal_digit(const uint8_t b)
Returns true if the given character is an octal digit.
Definition: pm_char.c:287
bool pm_char_is_binary_digit(const uint8_t b)
Returns true if the given character is a binary digit.
Definition: pm_char.c:279
bool pm_char_is_inline_whitespace(const uint8_t b)
Returns true if the given character is an inline whitespace character.
Definition: pm_char.c:141
size_t pm_strspn_inline_whitespace(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are inline whitespace.
Definition: pm_char.c:108
bool pm_char_is_whitespace(const uint8_t b)
Returns true if the given character is a whitespace character.
Definition: pm_char.c:133
size_t pm_strspn_regexp_option(const uint8_t *string, ptrdiff_t length)
Returns the number of characters at the start of the string that are regexp options.
Definition: pm_char.c:117
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity)
Initialize a new constant pool with a given capacity.
pm_constant_id_t pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool that is a slice of a source string.
void pm_constant_id_list_free(pm_constant_id_list_t *list)
Free the memory associated with a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is constant.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
void pm_constant_id_list_insert(pm_constant_id_list_t *list, size_t index, pm_constant_id_t id)
Insert a constant id into a list of constant ids at the specified index.
bool pm_constant_id_list_append(pm_constant_id_list_t *list, pm_constant_id_t id)
Append a constant id to a list of constant ids.
pm_constant_id_t pm_constant_pool_insert_owned(pm_constant_pool_t *pool, uint8_t *start, size_t length)
Insert a constant into a constant pool from memory that is now owned by the constant pool.
void pm_constant_id_list_init_capacity(pm_constant_id_list_t *list, size_t capacity)
Initialize a list of constant ids with a given capacity.
pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id)
Return a pointer to the constant indicated by the given constant id.
bool pm_constant_id_list_includes(pm_constant_id_list_t *list, pm_constant_id_t id)
Checks if the current constant id list includes the given constant id.
void pm_constant_pool_free(pm_constant_pool_t *pool)
Free the memory associated with a constant pool.
void pm_list_append(pm_list_t *list, pm_list_node_t *node)
Append a node to the given list.
Definition: pm_list.c:23
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding)
We need to roll our own memchr to handle cases where the encoding changes and we need to search for a...
Definition: pm_memchr.c:11
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line)
Returns the line and column of the given offset.
void pm_newline_list_free(pm_newline_list_t *list)
Free the internal memory allocated for the newline list.
int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line)
Returns the line of the given offset.
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity)
Initialize a new newline list with the given capacity.
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor)
Append a new offset to the newline list.
void pm_newline_list_clear(pm_newline_list_t *list)
Clear out the newlines that have been appended to the list.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
Definition: pm_string.c:352
void pm_string_ensure_owned(pm_string_t *string)
Ensure the string is owned.
Definition: pm_string.c:315
void pm_string_owned_init(pm_string_t *string, uint8_t *source, size_t length)
Initialize an owned string that is responsible for freeing allocated memory.
Definition: pm_string.c:30
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end)
Initialize a shared string that is based on initial input.
Definition: pm_string.c:16
#define PM_STRING_EMPTY
Defines an empty string.
Definition: pm_string.h:70
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition: pm_string.c:368
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
Definition: pm_string.c:360
int pm_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length)
Compare two strings, ignoring case, up to the given length.
const uint8_t * pm_strpbrk(pm_parser_t *parser, const uint8_t *source, const uint8_t *charset, ptrdiff_t length, bool validate)
Here we have rolled our own version of strpbrk.
Definition: pm_strpbrk.c:194
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
Definition: defines.h:233
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
Definition: defines.h:78
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
Definition: defines.h:34
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
Definition: defines.h:113
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition: defines.h:50
bool pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n)
Return true if the next character in the UTF-8 encoding if it is an uppercase character.
Definition: encoding.c:2346
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
Definition: encoding.h:252
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
Definition: encoding.h:245
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
Definition: encoding.h:68
const uint8_t pm_encoding_unicode_table[256]
This lookup table is referenced in both the UTF-8 encoding file and the parser directly in order to s...
Definition: encoding.c:2164
const pm_encoding_t * pm_encoding_find(const uint8_t *start, const uint8_t *end)
Parse the given name of an encoding and return a pointer to the corresponding encoding struct if one ...
Definition: encoding.c:5026
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
Definition: encoding.c:2287
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
Definition: encoding.h:74
PRISM_EXPORTED_FUNCTION void pm_node_destroy(pm_parser_t *parser, struct pm_node *node)
Deallocate a node and all of its children.
Definition: node.c:114
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
Definition: node.h:17
void pm_node_list_free(pm_node_list_t *list)
Free the internal memory associated with the given node list.
Definition: node.c:88
void pm_node_list_concat(pm_node_list_t *list, pm_node_list_t *other)
Concatenate the given node list onto the end of the other node list.
Definition: node.c:77
void pm_node_list_append(pm_node_list_t *list, pm_node_t *node)
Append a new node onto the end of the node list.
Definition: node.c:55
#define PRISM_VERSION
The version of the Prism library as a constant string.
Definition: version.h:27
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
Definition: version.h:22
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
Definition: version.h:17
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
Definition: version.h:12
The main header file for the prism parser.
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid method name.
Definition: prism.c:22781
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
Definition: prism.c:22369
PRISM_EXPORTED_FUNCTION const char * pm_version(void)
The prism version and the serialization format.
Definition: prism.c:7
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
Definition: prism.c:22407
pm_string_query_t
Represents the results of a slice query.
Definition: prism.h:240
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
Definition: prism.h:248
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
Definition: prism.h:242
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
Definition: prism.h:245
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
Definition: serialize.c:2121
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream to retrieve a line of input from a stream.
Definition: prism.h:88
PRISM_EXPORTED_FUNCTION void pm_serialize(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the AST represented by the given node to the given buffer.
Definition: prism.c:22581
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data)
Parse and serialize the comments in the given source to the given buffer.
Definition: prism.c:22636
PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data)
Parse and serialize the AST represented by the source that is read out of the given stream into to th...
Definition: prism.c:22615
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
Definition: prism.c:22514
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Initiate the parser with the given parser.
Definition: prism.c:22433
PRISM_EXPORTED_FUNCTION void pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, const char *data)
Parse the given source to the AST and dump the AST to the given buffer.
Definition: prism.c:22592
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
Definition: serialize.c:2098
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
Definition: prism.c:22114
PRISM_EXPORTED_FUNCTION bool pm_parse_success_p(const uint8_t *source, size_t size, const char *data)
Parse the source and return true if it parses without errors or warnings.
Definition: prism.c:22537
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid constant name.
Definition: prism.c:22761
PRISM_EXPORTED_FUNCTION pm_string_query_t pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name)
Check that the slice is a valid local variable name.
Definition: prism.c:22741
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
Definition: serialize.c:2028
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
Definition: token_type.c:362
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data)
Parse a regular expression.
Definition: regexp.c:772
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node)
Create a string-based representation of the given static literal.
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace)
Add a node to the set of static literals.
void pm_static_literals_free(pm_static_literals_t *literals)
Free the internal memory associated with the given static literals set.
This struct is used to pass information between the regular expression parser and the error callback.
Definition: prism.c:17928
pm_parser_t * parser
The parser that we are parsing the regular expression for.
Definition: prism.c:17930
const uint8_t * start
The start of the regular expression.
Definition: prism.c:17933
bool shared
Whether or not the source of the regular expression is shared.
Definition: prism.c:17944
const uint8_t * end
The end of the regular expression.
Definition: prism.c:17936
This struct is used to pass information between the regular expression parser and the named capture c...
Definition: prism.c:20774
pm_constant_id_list_t names
The list of names that have been parsed.
Definition: prism.c:20785
pm_parser_t * parser
The parser that is parsing the regular expression.
Definition: prism.c:20776
pm_match_write_node_t * match
The match write node that is being created.
Definition: prism.c:20782
pm_call_node_t * call
The call node wrapping the regular expression node.
Definition: prism.c:20779
bool shared
Whether the content of the regular expression is shared.
Definition: prism.c:20792
AndNode.
Definition: ast.h:1258
struct pm_node * left
AndNode::left.
Definition: ast.h:1274
struct pm_node * right
AndNode::right.
Definition: ast.h:1287
ArgumentsNode.
Definition: ast.h:1319
pm_node_t base
The embedded base node.
Definition: ast.h:1321
struct pm_node_list arguments
ArgumentsNode::arguments.
Definition: ast.h:1327
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
Definition: prism.c:1575
pm_node_t * block
The optional block attached to the call.
Definition: prism.c:1586
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
Definition: prism.c:1589
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
Definition: prism.c:1577
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
Definition: prism.c:1580
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
Definition: prism.c:1583
ArrayNode.
Definition: ast.h:1345
struct pm_node_list elements
ArrayNode::elements.
Definition: ast.h:1355
ArrayPatternNode.
Definition: ast.h:1406
struct pm_node * constant
ArrayPatternNode::constant.
Definition: ast.h:1414
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
Definition: ast.h:1434
pm_node_t base
The embedded base node.
Definition: ast.h:1408
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
Definition: ast.h:1439
AssocNode.
Definition: ast.h:1454
struct pm_node * value
AssocNode::value.
Definition: ast.h:1486
struct pm_node * key
AssocNode::key.
Definition: ast.h:1473
BeginNode.
Definition: ast.h:1580
struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
Definition: ast.h:1608
struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
Definition: ast.h:1598
struct pm_statements_node * statements
BeginNode::statements.
Definition: ast.h:1593
pm_node_t base
The embedded base node.
Definition: ast.h:1582
struct pm_else_node * else_clause
BeginNode::else_clause.
Definition: ast.h:1603
This struct represents a set of binding powers used for a given token.
Definition: prism.c:12881
bool binary
Whether or not this token can be used as a binary operator.
Definition: prism.c:12889
pm_binding_power_t left
The left binding power.
Definition: prism.c:12883
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
Definition: prism.c:12895
pm_binding_power_t right
The right binding power.
Definition: prism.c:12886
BlockLocalVariableNode.
Definition: ast.h:1659
BlockNode.
Definition: ast.h:1682
BlockParameterNode.
Definition: ast.h:1729
BlockParametersNode.
Definition: ast.h:1766
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
Definition: pm_buffer.h:22
size_t length
The length of the buffer in bytes.
Definition: pm_buffer.h:24
char * value
A pointer to the start of the buffer.
Definition: pm_buffer.h:30
CallNode.
Definition: ast.h:1922
pm_location_t opening_loc
CallNode::opening_loc.
Definition: ast.h:1961
pm_location_t closing_loc
CallNode::closing_loc.
Definition: ast.h:1971
struct pm_node * receiver
CallNode::receiver.
Definition: ast.h:1941
pm_constant_id_t name
CallNode::name.
Definition: ast.h:1951
pm_node_t base
The embedded base node.
Definition: ast.h:1924
pm_location_t call_operator_loc
CallNode::call_operator_loc.
Definition: ast.h:1946
pm_location_t message_loc
CallNode::message_loc.
Definition: ast.h:1956
struct pm_arguments_node * arguments
CallNode::arguments.
Definition: ast.h:1966
struct pm_node * block
CallNode::block.
Definition: ast.h:1976
CaseMatchNode.
Definition: ast.h:2201
struct pm_node_list conditions
CaseMatchNode::conditions.
Definition: ast.h:2214
CaseNode.
Definition: ast.h:2246
struct pm_node_list conditions
CaseNode::conditions.
Definition: ast.h:2259
ClassVariableReadNode.
Definition: ast.h:2466
ClassVariableTargetNode.
Definition: ast.h:2495
ClassVariableWriteNode.
Definition: ast.h:2518
This is a node in the linked list of comments that we've found while parsing.
Definition: parser.h:458
pm_comment_type_t type
The type of comment that we've found.
Definition: parser.h:466
pm_location_t location
The location of the comment in the source.
Definition: parser.h:463
A list of constant IDs.
ConstantPathNode.
Definition: ast.h:2732
ConstantPathTargetNode.
Definition: ast.h:2870
ConstantReadNode.
Definition: ast.h:2965
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
ConstantTargetNode.
Definition: ast.h:2994
ConstantWriteNode.
Definition: ast.h:3017
This is a node in a linked list of contexts.
Definition: parser.h:439
pm_context_t context
The context that this node represents.
Definition: parser.h:441
struct pm_context_node * prev
A pointer to the previous context in the linked list.
Definition: parser.h:444
This struct represents a diagnostic generated during parsing.
Definition: diagnostic.h:359
pm_list_node_t node
The embedded base node.
Definition: diagnostic.h:361
pm_diagnostic_id_t diag_id
The ID of the diagnostic.
Definition: diagnostic.h:367
ElseNode.
Definition: ast.h:3196
struct pm_statements_node * statements
ElseNode::statements.
Definition: ast.h:3209
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition: encoding.h:23
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
Definition: encoding.h:36
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition: encoding.h:29
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
Definition: encoding.h:50
const char * name
The name of the encoding.
Definition: encoding.h:56
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
Definition: encoding.h:43
EnsureNode.
Definition: ast.h:3294
struct pm_statements_node * statements
EnsureNode::statements.
Definition: ast.h:3307
FindPatternNode.
Definition: ast.h:3351
struct pm_node * constant
FindPatternNode::constant.
Definition: ast.h:3359
pm_location_t opening_loc
FindPatternNode::opening_loc.
Definition: ast.h:3379
pm_node_t base
The embedded base node.
Definition: ast.h:3353
pm_location_t closing_loc
FindPatternNode::closing_loc.
Definition: ast.h:3384
FlipFlopNode.
Definition: ast.h:3402
FloatNode.
Definition: ast.h:3435
double value
FloatNode::value.
Definition: ast.h:3445
pm_node_t base
The embedded base node.
Definition: ast.h:3437
ForwardingParameterNode.
Definition: ast.h:3571
GlobalVariableReadNode.
Definition: ast.h:3731
GlobalVariableTargetNode.
Definition: ast.h:3760
GlobalVariableWriteNode.
Definition: ast.h:3783
HashNode.
Definition: ast.h:3845
struct pm_node_list elements
HashNode::elements.
Definition: ast.h:3871
HashPatternNode.
Definition: ast.h:3899
pm_location_t opening_loc
HashPatternNode::opening_loc.
Definition: ast.h:3922
pm_node_t base
The embedded base node.
Definition: ast.h:3901
pm_location_t closing_loc
HashPatternNode::closing_loc.
Definition: ast.h:3927
struct pm_node * constant
HashPatternNode::constant.
Definition: ast.h:3907
All of the information necessary to store to lexing a heredoc.
Definition: parser.h:88
size_t ident_length
The length of the heredoc identifier.
Definition: parser.h:93
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
Definition: parser.h:96
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
Definition: parser.h:99
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
Definition: parser.h:90
IfNode.
Definition: ast.h:3948
struct pm_statements_node * statements
IfNode::statements.
Definition: ast.h:4008
struct pm_node * subsequent
IfNode::subsequent.
Definition: ast.h:4027
ImaginaryNode.
Definition: ast.h:4054
InstanceVariableReadNode.
Definition: ast.h:4544
InstanceVariableTargetNode.
Definition: ast.h:4573
InstanceVariableWriteNode.
Definition: ast.h:4596
IntegerNode.
Definition: ast.h:4664
pm_integer_t value
IntegerNode::value.
Definition: ast.h:4674
pm_node_t base
The embedded base node.
Definition: ast.h:4666
bool negative
Whether or not the integer is negative.
Definition: pm_integer.h:42
InterpolatedMatchLastLineNode.
Definition: ast.h:4702
InterpolatedRegularExpressionNode.
Definition: ast.h:4748
InterpolatedStringNode.
Definition: ast.h:4785
pm_node_t base
The embedded base node.
Definition: ast.h:4787
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
Definition: ast.h:4793
InterpolatedSymbolNode.
Definition: ast.h:4818
pm_node_t base
The embedded base node.
Definition: ast.h:4820
InterpolatedXStringNode.
Definition: ast.h:4851
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
Definition: ast.h:4859
pm_node_t base
The embedded base node.
Definition: ast.h:4853
struct pm_node_list parts
InterpolatedXStringNode::parts.
Definition: ast.h:4864
KeywordHashNode.
Definition: ast.h:4923
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
Definition: parser.h:518
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
Definition: parser.h:512
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
Definition: parser.h:109
union pm_lex_mode::@88 as
The data associated with this type of lex mode.
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
Definition: parser.h:254
enum pm_lex_mode::@87 mode
The type of this lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
Definition: parser.h:246
int32_t line
The line number.
This struct represents an abstract linked list that provides common functionality.
Definition: pm_list.h:46
struct pm_list_node * next
A pointer to the next node in the list.
Definition: pm_list.h:48
This represents the overall linked list.
Definition: pm_list.h:55
pm_list_node_t * head
A pointer to the head of the list.
Definition: pm_list.h:60
size_t size
The size of the list.
Definition: pm_list.h:57
This tracks an individual local variable in a certain lexical context, as well as the number of times...
Definition: parser.h:532
pm_constant_id_t name
The name of the local variable.
Definition: parser.h:534
pm_location_t location
The location of the local variable in the source.
Definition: parser.h:537
uint32_t hash
The hash of the local variable.
Definition: parser.h:546
uint32_t index
The index of the local variable in the local table.
Definition: parser.h:540
uint32_t reads
The number of times the local variable is read.
Definition: parser.h:543
LocalVariableReadNode.
Definition: ast.h:5165
uint32_t depth
LocalVariableReadNode::depth.
Definition: ast.h:5196
pm_constant_id_t name
LocalVariableReadNode::name.
Definition: ast.h:5183
LocalVariableTargetNode.
Definition: ast.h:5211
LocalVariableWriteNode.
Definition: ast.h:5239
uint32_t depth
LocalVariableWriteNode::depth.
Definition: ast.h:5266
pm_constant_id_t name
LocalVariableWriteNode::name.
Definition: ast.h:5253
This is a set of local variables in a certain lexical context (method, class, module,...
Definition: parser.h:554
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
Definition: parser.h:562
uint32_t capacity
The capacity of the local variables set.
Definition: parser.h:559
uint32_t size
The number of local variables in the set.
Definition: parser.h:556
This represents a range of bytes in the source string to which a node or token corresponds.
Definition: ast.h:545
const uint8_t * start
A pointer to the start location of the range in the source.
Definition: ast.h:547
const uint8_t * end
A pointer to the end location of the range in the source.
Definition: ast.h:550
This is a node in the linked list of magic comments that we've found while parsing.
Definition: parser.h:475
MatchLastLineNode.
Definition: ast.h:5331
MatchWriteNode.
Definition: ast.h:5435
struct pm_node_list targets
MatchWriteNode::targets.
Definition: ast.h:5448
MultiTargetNode.
Definition: ast.h:5531
pm_node_t base
The embedded base node.
Definition: ast.h:5533
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
Definition: ast.h:5589
struct pm_node_list lefts
MultiTargetNode::lefts.
Definition: ast.h:5549
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
Definition: ast.h:5599
MultiWriteNode.
Definition: ast.h:5614
size_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
A list of nodes in the source, most often used for lists of children.
Definition: ast.h:558
size_t size
The number of nodes in the list.
Definition: ast.h:560
struct pm_node ** nodes
The nodes in the list.
Definition: ast.h:566
This is the base structure that represents a node in the syntax tree.
Definition: ast.h:1069
pm_node_type_t type
This represents the type of the node.
Definition: ast.h:1074
pm_node_flags_t flags
This represents any flags on the node.
Definition: ast.h:1080
pm_location_t location
This is the location of the node in the source.
Definition: ast.h:1092
OptionalParameterNode.
Definition: ast.h:5887
A scope of locals surrounding the code that is being parsed.
Definition: options.h:36
size_t locals_count
The number of locals in the scope.
Definition: options.h:38
The options that can be passed to the parser.
Definition: options.h:77
uint8_t command_line
A bitset of the various options that were set on the command line.
Definition: options.h:126
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
Definition: options.h:88
bool encoding_locked
Whether or not the encoding magic comments should be respected.
Definition: options.h:142
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
Definition: options.h:149
pm_string_t encoding
The name of the encoding that the source file is in.
Definition: options.h:103
int32_t line
The line within the file that the parse starts on.
Definition: options.h:97
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
Definition: options.h:82
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
Definition: options.h:135
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
Definition: options.h:159
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
Definition: options.h:108
pm_string_t filepath
The name of the file that is currently being parsed.
Definition: options.h:91
pm_options_version_t version
The version of prism that we should be parsing with.
Definition: options.h:123
OrNode.
Definition: ast.h:5925
struct pm_node * left
OrNode::left.
Definition: ast.h:5941
struct pm_node * right
OrNode::right.
Definition: ast.h:5954
ParametersNode.
Definition: ast.h:5980
struct pm_node * rest
ParametersNode::rest.
Definition: ast.h:5998
struct pm_block_parameter_node * block
ParametersNode::block.
Definition: ast.h:6018
pm_node_t base
The embedded base node.
Definition: ast.h:5982
struct pm_node * keyword_rest
ParametersNode::keyword_rest.
Definition: ast.h:6013
ParenthesesNode.
Definition: ast.h:6033
struct pm_node * body
ParenthesesNode::body.
Definition: ast.h:6041
This struct represents the overall parser.
Definition: parser.h:640
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
Definition: parser.h:840
pm_lex_state_t lex_state
The current state of the lexer.
Definition: parser.h:649
uint8_t command_line
The command line flags given from the options.
Definition: parser.h:859
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition: parser.h:755
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
Definition: parser.h:882
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
Definition: parser.h:909
const uint8_t * end
The pointer to the end of the source.
Definition: parser.h:694
bool recovering
Whether or not we're currently recovering from a syntax error.
Definition: parser.h:888
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
Definition: parser.h:797
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
Definition: parser.h:930
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
Definition: parser.h:786
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
Definition: parser.h:912
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
Definition: parser.h:707
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
Definition: parser.h:749
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
Definition: parser.h:721
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
Definition: parser.h:658
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
Definition: parser.h:774
pm_options_version_t version
The version of prism that we should use to parse.
Definition: parser.h:856
pm_token_t previous
The previous token we were considering.
Definition: parser.h:697
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
Definition: parser.h:803
bool parsing_eval
Whether or not we are parsing an eval string.
Definition: parser.h:875
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
Definition: parser.h:924
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition: parser.h:903
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
Definition: parser.h:728
pm_context_node_t * current_context
The current parsing context.
Definition: parser.h:740
struct pm_parser::@93 lex_modes
A stack of lex modes.
const uint8_t * start
The pointer to the start of the source.
Definition: parser.h:691
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
Definition: parser.h:652
pm_list_t error_list
The list of errors that have been found while parsing.
Definition: parser.h:734
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
Definition: parser.h:869
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
Definition: parser.h:853
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
Definition: parser.h:768
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
Definition: parser.h:684
pm_list_t warning_list
The list of warnings that have been found while parsing.
Definition: parser.h:731
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
Definition: parser.h:715
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
Definition: parser.h:664
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
Definition: parser.h:762
int32_t start_line
The line number at the start of the parse.
Definition: parser.h:809
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
Definition: parser.h:896
pm_lex_mode_t * current
The current mode of the lexer.
Definition: parser.h:681
pm_list_t comment_list
The list of comments that have been found while parsing.
Definition: parser.h:718
size_t index
The current index into the lexer mode stack.
Definition: parser.h:687
pm_string_t filepath
This is the path of the file being parsed.
Definition: parser.h:780
pm_scope_t * current_scope
The current local scope.
Definition: parser.h:737
bool command_start
Whether or not we're at the beginning of a command.
Definition: parser.h:885
pm_newline_list_t newline_list
This is the list of newline offsets in the source file.
Definition: parser.h:789
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
Definition: parser.h:918
uint32_t node_id
The next node identifier that will be assigned.
Definition: parser.h:646
RangeNode.
Definition: ast.h:6239
struct pm_node * right
RangeNode::right.
Definition: ast.h:6269
struct pm_node * left
RangeNode::left.
Definition: ast.h:6255
RationalNode.
Definition: ast.h:6297
pm_node_t base
The embedded base node.
Definition: ast.h:6299
pm_integer_t numerator
RationalNode::numerator.
Definition: ast.h:6309
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
Definition: prism.c:10364
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
Definition: prism.c:10369
pm_token_buffer_t base
The embedded base buffer.
Definition: prism.c:10366
RegularExpressionNode.
Definition: ast.h:6364
pm_node_t base
The embedded base node.
Definition: ast.h:6366
pm_string_t unescaped
RegularExpressionNode::unescaped.
Definition: ast.h:6387
RequiredParameterNode.
Definition: ast.h:6438
RescueNode.
Definition: ast.h:6499
struct pm_rescue_node * subsequent
RescueNode::subsequent.
Definition: ast.h:6532
pm_node_t base
The embedded base node.
Definition: ast.h:6501
This struct represents a node in a linked list of scopes.
Definition: parser.h:580
struct pm_scope * previous
A pointer to the previous scope in the linked list.
Definition: parser.h:582
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
Definition: parser.h:593
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
Definition: parser.h:620
pm_locals_t locals
The IDs of the locals in the given scope.
Definition: parser.h:585
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
Definition: parser.h:614
bool closed
A boolean indicating whether or not this scope can see into its parent.
Definition: parser.h:626
SplatNode.
Definition: ast.h:6794
struct pm_node * expression
SplatNode::expression.
Definition: ast.h:6807
StatementsNode.
Definition: ast.h:6822
struct pm_node_list body
StatementsNode::body.
Definition: ast.h:6830
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
StringNode.
Definition: ast.h:6857
pm_node_t base
The embedded base node.
Definition: ast.h:6859
pm_string_t unescaped
StringNode::unescaped.
Definition: ast.h:6880
pm_location_t closing_loc
StringNode::closing_loc.
Definition: ast.h:6875
pm_location_t opening_loc
StringNode::opening_loc.
Definition: ast.h:6865
A generic string type that can have various ownership semantics.
Definition: pm_string.h:33
const uint8_t * source
A pointer to the start of the string.
Definition: pm_string.h:35
enum pm_string_t::@94 type
The type of the string.
size_t length
The length of the string in bytes of memory.
Definition: pm_string.h:38
SymbolNode.
Definition: ast.h:6949
pm_location_t value_loc
SymbolNode::value_loc.
Definition: ast.h:6962
pm_string_t unescaped
SymbolNode::unescaped.
Definition: ast.h:6972
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
Definition: prism.c:10338
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
Definition: prism.c:10343
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
Definition: prism.c:10349
This struct represents a token in the Ruby source.
Definition: ast.h:530
const uint8_t * end
A pointer to the end location of the token in the source.
Definition: ast.h:538
const uint8_t * start
A pointer to the start location of the token in the source.
Definition: ast.h:535
pm_token_type_t type
The type of the token.
Definition: ast.h:532
UndefNode.
Definition: ast.h:7005
UnlessNode.
Definition: ast.h:7036
struct pm_statements_node * statements
UnlessNode::statements.
Definition: ast.h:7086
struct pm_else_node * else_clause
UnlessNode::else_clause.
Definition: ast.h:7096
WhenNode.
Definition: ast.h:7167
XStringNode.
Definition: ast.h:7253