2#include "prism/node_new.h"
16#define PM_TAB_WHITESPACE_SIZE 8
19#define MIN(a,b) (((a)<(b))?(a):(b))
20#define MAX(a,b) (((a)>(b))?(a):(b))
26#define U32(value_) ((uint32_t) (value_))
28#define FL PM_NODE_FLAGS
29#define UP PM_NODE_UPCAST
31#define PM_LOCATION_START(location_) ((location_)->start)
32#define PM_LOCATION_END(location_) ((location_)->start + (location_)->length)
34#define PM_TOKEN_START(parser_, token_) U32((token_)->start - (parser_)->start)
35#define PM_TOKEN_END(parser_, token_) U32((token_)->end - (parser_)->start)
36#define PM_TOKEN_LENGTH(token_) U32((token_)->end - (token_)->start)
37#define PM_TOKENS_LENGTH(left_, right_) U32((right_)->end - (left_)->start)
39#define PM_NODE_START(node_) (UP(node_)->location.start)
40#define PM_NODE_LENGTH(node_) (UP(node_)->location.length)
41#define PM_NODE_END(node_) (UP(node_)->location.start + UP(node_)->location.length)
42#define PM_NODES_LENGTH(left_, right_) (PM_NODE_END(right_) - PM_NODE_START(left_))
44#define PM_TOKEN_NODE_LENGTH(parser_, token_, node_) (PM_NODE_END(node_) - PM_TOKEN_START(parser_, token_))
45#define PM_NODE_TOKEN_LENGTH(parser_, node_, token_) (PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
47#define PM_NODE_START_SET_NODE(left_, right_) (PM_NODE_START(left_) = PM_NODE_START(right_))
48#define PM_NODE_START_SET_TOKEN(parser_, node_, token_) (PM_NODE_START(node_) = PM_TOKEN_START(parser_, token_))
49#define PM_NODE_LENGTH_SET_NODE(left_, right_) (PM_NODE_LENGTH(left_) = PM_NODE_END(right_) - PM_NODE_START(left_))
50#define PM_NODE_LENGTH_SET_TOKEN(parser_, node_, token_) (PM_NODE_LENGTH(node_) = PM_TOKEN_END(parser_, token_) - PM_NODE_START(node_))
51#define PM_NODE_LENGTH_SET_LOCATION(node_, location_) (PM_NODE_LENGTH(node_) = PM_LOCATION_END(location_) - PM_NODE_START(node_))
53#define PM_LOCATION_INIT(start_, length_) ((pm_location_t) { .start = (start_), .length = (length_) })
54#define PM_LOCATION_INIT_UNSET PM_LOCATION_INIT(0, 0)
55#define PM_LOCATION_INIT_TOKEN(parser_, token_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_))
56#define PM_LOCATION_INIT_NODE(node_) UP(node_)->location
58#define PM_LOCATION_INIT_TOKENS(parser_, left_, right_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, left_), PM_TOKENS_LENGTH(left_, right_))
59#define PM_LOCATION_INIT_NODES(left_, right_) PM_LOCATION_INIT(PM_NODE_START(left_), PM_NODES_LENGTH(left_, right_))
60#define PM_LOCATION_INIT_TOKEN_NODE(parser_, token_, node_) PM_LOCATION_INIT(PM_TOKEN_START(parser_, token_), PM_TOKEN_NODE_LENGTH(parser_, token_, node_))
61#define PM_LOCATION_INIT_NODE_TOKEN(parser_, node_, token_) PM_LOCATION_INIT(PM_NODE_START(node_), PM_NODE_TOKEN_LENGTH(parser_, node_, token_))
63#define TOK2LOC(parser_, token_) PM_LOCATION_INIT_TOKEN(parser_, token_)
64#define NTOK2LOC(parser_, token_) ((token_) == NULL ? PM_LOCATION_INIT_UNSET : TOK2LOC(parser_, token_))
65#define NTOK2PTR(token_) ((token_).start == NULL ? NULL : &(token_))
76lex_mode_incrementor(
const uint8_t start) {
93lex_mode_terminator(
const uint8_t start) {
135lex_mode_push_list(
pm_parser_t *parser,
bool interpolation, uint8_t delimiter) {
136 uint8_t incrementor = lex_mode_incrementor(delimiter);
137 uint8_t terminator = lex_mode_terminator(delimiter);
143 .interpolation = interpolation,
144 .incrementor = incrementor,
145 .terminator = terminator
152 memcpy(breakpoints,
"\\ \t\f\r\v\n\0\0\0",
sizeof(lex_mode.
as.list.
breakpoints));
157 if (terminator !=
'\0') {
158 breakpoints[index++] = terminator;
164 breakpoints[index++] =
'#';
168 if (incrementor !=
'\0') {
169 breakpoints[index++] = incrementor;
173 return lex_mode_push(parser, lex_mode);
183 return lex_mode_push_list(parser,
false,
'\0');
190lex_mode_push_regexp(
pm_parser_t *parser, uint8_t incrementor, uint8_t terminator) {
192 .
mode = PM_LEX_REGEXP,
195 .incrementor = incrementor,
196 .terminator = terminator
204 memcpy(breakpoints,
"\r\n\\#\0\0",
sizeof(lex_mode.
as.regexp.
breakpoints));
208 if (terminator !=
'\0') {
209 breakpoints[index++] = terminator;
213 if (incrementor !=
'\0') {
214 breakpoints[index++] = incrementor;
218 return lex_mode_push(parser, lex_mode);
225lex_mode_push_string(
pm_parser_t *parser,
bool interpolation,
bool label_allowed, uint8_t incrementor, uint8_t terminator) {
227 .
mode = PM_LEX_STRING,
230 .interpolation = interpolation,
231 .label_allowed = label_allowed,
232 .incrementor = incrementor,
233 .terminator = terminator
240 memcpy(breakpoints,
"\r\n\\\0\0\0",
sizeof(lex_mode.
as.string.
breakpoints));
245 if (terminator !=
'\0') {
246 breakpoints[index++] = terminator;
252 breakpoints[index++] =
'#';
257 if (incrementor !=
'\0') {
258 breakpoints[index++] = incrementor;
262 return lex_mode_push(parser, lex_mode);
272 return lex_mode_push_string(parser,
false,
false,
'\0',
'\0');
304 PM_IGNORED_NEWLINE_NONE = 0,
305 PM_IGNORED_NEWLINE_ALL,
306 PM_IGNORED_NEWLINE_PATTERN
307} pm_ignored_newline_type_t;
309static inline pm_ignored_newline_type_t
311 bool ignored = lex_state_p(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_CLASS | PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT) && !lex_state_p(parser, PM_LEX_STATE_LABELED);
314 return PM_IGNORED_NEWLINE_ALL;
315 }
else if ((parser->
lex_state & ~((
unsigned int) PM_LEX_STATE_LABEL)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) {
316 return PM_IGNORED_NEWLINE_PATTERN;
318 return PM_IGNORED_NEWLINE_NONE;
324 return lex_state_p(parser, PM_LEX_STATE_BEG_ANY) || ((parser->
lex_state & (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED)) == (PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED));
329 return lex_state_p(parser, PM_LEX_STATE_ARG_ANY);
333lex_state_spcarg_p(
pm_parser_t *parser,
bool space_seen) {
337 return lex_state_arg_p(parser) && space_seen && !pm_char_is_whitespace(*parser->
current.end);
342 return lex_state_p(parser, PM_LEX_STATE_END_ANY);
350 return lex_state_p(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_DOT);
362#ifndef PM_DEBUG_LOGGING
367#define PM_DEBUG_LOGGING 0
373 fprintf(stderr,
"STATE: ");
376 if (parser->
lex_state == PM_LEX_STATE_NONE) {
377 fprintf(stderr,
"NONE\n");
381#define CHECK_STATE(state) \
382 if (parser->lex_state & state) { \
383 if (!first) fprintf(stderr, "|"); \
384 fprintf(stderr, "%s", #state); \
388 CHECK_STATE(PM_LEX_STATE_BEG)
389 CHECK_STATE(PM_LEX_STATE_END)
390 CHECK_STATE(PM_LEX_STATE_ENDARG)
391 CHECK_STATE(PM_LEX_STATE_ENDFN)
392 CHECK_STATE(PM_LEX_STATE_ARG)
393 CHECK_STATE(PM_LEX_STATE_CMDARG)
394 CHECK_STATE(PM_LEX_STATE_MID)
395 CHECK_STATE(PM_LEX_STATE_FNAME)
396 CHECK_STATE(PM_LEX_STATE_DOT)
397 CHECK_STATE(PM_LEX_STATE_CLASS)
398 CHECK_STATE(PM_LEX_STATE_LABEL)
399 CHECK_STATE(PM_LEX_STATE_LABELED)
400 CHECK_STATE(PM_LEX_STATE_FITEM)
404 fprintf(stderr,
"\n");
409 fprintf(stderr,
"Caller: %s:%d\nPrevious: ", caller_name, line_number);
411 lex_state_set(parser, state);
412 fprintf(stderr,
"Now: ");
414 fprintf(stderr,
"\n");
417#define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
425#define PM_PARSER_COMMAND_LINE_OPTION(parser, option) ((parser)->command_line & (option))
428#define PM_PARSER_COMMAND_LINE_OPTION_A(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_A)
431#define PM_PARSER_COMMAND_LINE_OPTION_E(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_E)
434#define PM_PARSER_COMMAND_LINE_OPTION_L(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_L)
437#define PM_PARSER_COMMAND_LINE_OPTION_N(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_N)
440#define PM_PARSER_COMMAND_LINE_OPTION_P(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_P)
443#define PM_PARSER_COMMAND_LINE_OPTION_X(parser) PM_PARSER_COMMAND_LINE_OPTION(parser, PM_OPTIONS_COMMAND_LINE_X)
454 pm_diagnostic_list_append(&parser->
error_list, start, length, diag_id);
463 pm_parser_err(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
472 pm_parser_err_token(parser, &parser->
current, diag_id);
481 pm_parser_err_token(parser, &parser->
previous, diag_id);
490 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
496#define PM_PARSER_ERR_FORMAT(parser_, start_, length_, diag_id_, ...) \
497 pm_diagnostic_list_append_format(&(parser_)->error_list, start_, length_, diag_id_, __VA_ARGS__)
503#define PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, ...) \
504 PM_PARSER_ERR_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
510#define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser_, node_, diag_id_) \
511 PM_PARSER_ERR_NODE_FORMAT(parser_, node_, diag_id_, (int) PM_NODE_LENGTH(node_), (const char *) (parser_->start + PM_NODE_START(node_)))
517#define PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id, ...) \
518 PM_PARSER_ERR_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id, __VA_ARGS__)
524#define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
525 PM_PARSER_ERR_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
532 pm_diagnostic_list_append(&parser->
warning_list, start, length, diag_id);
541 pm_parser_warn(parser, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), diag_id);
550 pm_parser_warn(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), diag_id);
557#define PM_PARSER_WARN_FORMAT(parser_, start_, length_, diag_id_, ...) \
558 pm_diagnostic_list_append_format(&(parser_)->warning_list, start_, length_, diag_id_, __VA_ARGS__)
564#define PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, ...) \
565 PM_PARSER_WARN_FORMAT(parser_, PM_TOKEN_START(parser_, token_), PM_TOKEN_LENGTH(token_), diag_id_, __VA_ARGS__)
571#define PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser_, token_, diag_id_) \
572 PM_PARSER_WARN_TOKEN_FORMAT(parser_, token_, diag_id_, (int) PM_TOKEN_LENGTH(token_), (const char *) (token_)->start)
578#define PM_PARSER_WARN_NODE_FORMAT(parser_, node_, diag_id_, ...) \
579 PM_PARSER_WARN_FORMAT(parser_, PM_NODE_START(node_), PM_NODE_LENGTH(node_), diag_id_, __VA_ARGS__)
587pm_parser_err_heredoc_term(
pm_parser_t *parser,
const uint8_t *ident_start,
size_t ident_length) {
588 PM_PARSER_ERR_FORMAT(
590 U32(ident_start - parser->
start),
594 (
const char *) ident_start
606pm_parser_scope_push(
pm_parser_t *parser,
bool closed) {
608 if (scope == NULL)
return false;
613 .parameters = PM_SCOPE_PARAMETERS_NONE,
614 .implicit_parameters = { 0 },
632 if (scope->
previous == NULL)
return true;
633 if (scope->
closed)
return false;
634 }
while ((scope = scope->
previous) != NULL);
636 assert(
false &&
"unreachable");
644pm_parser_scope_find(
pm_parser_t *parser, uint32_t depth) {
647 while (depth-- > 0) {
648 assert(scope != NULL);
656 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS,
657 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT,
658 PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL
659} pm_scope_forwarding_param_check_result_t;
661static pm_scope_forwarding_param_check_result_t
662pm_parser_scope_forwarding_param_check(
pm_parser_t *parser,
const uint8_t mask) {
664 bool conflict =
false;
666 while (scope != NULL) {
670 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT;
672 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS;
683 return PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL;
688 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_BLOCK)) {
689 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
692 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
693 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_AMPERSAND);
695 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
696 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_AMPERSAND);
703 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_POSITIONALS)) {
704 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
707 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
708 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR);
710 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
711 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
718 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_ALL)) {
719 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
722 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
727 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
728 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
735 switch (pm_parser_scope_forwarding_param_check(parser, PM_SCOPE_PARAMETERS_FORWARDING_KEYWORDS)) {
736 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_PASS:
739 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_CONFLICT:
740 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_CONFLICT_STAR_STAR);
742 case PM_SCOPE_FORWARDING_PARAM_CHECK_RESULT_FAIL:
743 pm_parser_err_token(parser, token, PM_ERR_ARGUMENT_NO_FORWARDING_STAR_STAR);
752pm_parser_scope_shareable_constant_get(
pm_parser_t *parser) {
776#define PM_LOCALS_HASH_THRESHOLD 9
791 name = ((name >> 16) ^ name) * 0x45d9f3b;
792 name = ((name >> 16) ^ name) * 0x45d9f3b;
793 name = (name >> 16) ^ name;
803 uint32_t next_capacity = locals->
capacity == 0 ? 4 : (locals->
capacity * 2);
804 assert(next_capacity > locals->
capacity);
807 if (next_locals == NULL) abort();
809 if (next_capacity < PM_LOCALS_HASH_THRESHOLD) {
810 if (locals->
size > 0) {
816 bool hash_needed = (locals->
capacity <= PM_LOCALS_HASH_THRESHOLD);
817 uint32_t mask = next_capacity - 1;
819 for (uint32_t index = 0; index < locals->
capacity; index++) {
823 if (hash_needed) local->
hash = pm_locals_hash(local->
name);
825 uint32_t hash = local->
hash;
827 next_locals[hash & mask] = *local;
832 pm_locals_free(locals);
833 locals->
locals = next_locals;
855 pm_locals_resize(locals);
858 if (locals->
capacity < PM_LOCALS_HASH_THRESHOLD) {
859 for (uint32_t index = 0; index < locals->
capacity; index++) {
865 .location = { .start = start, .length = length },
866 .index = locals->
size++,
871 }
else if (local->
name == name) {
876 uint32_t mask = locals->
capacity - 1;
877 uint32_t hash = pm_locals_hash(name);
878 uint32_t initial_hash = hash;
886 .location = { .start = start, .length = length },
887 .index = locals->
size++,
892 }
else if (local->
name == name) {
897 }
while ((hash & mask) != initial_hash);
900 assert(
false &&
"unreachable");
910 if (locals->
capacity < PM_LOCALS_HASH_THRESHOLD) {
911 for (uint32_t index = 0; index < locals->
size; index++) {
913 if (local->
name == name)
return index;
916 uint32_t mask = locals->
capacity - 1;
917 uint32_t hash = pm_locals_hash(name);
918 uint32_t initial_hash = hash & mask;
925 }
else if (local->
name == name) {
930 }
while ((hash & mask) != initial_hash);
942 uint32_t index = pm_locals_find(locals, name);
943 assert(index != UINT32_MAX);
946 assert(local->
reads < UINT32_MAX);
957 uint32_t index = pm_locals_find(locals, name);
958 assert(index != UINT32_MAX);
961 assert(local->
reads > 0);
971 uint32_t index = pm_locals_find(locals, name);
972 assert(index != UINT32_MAX);
987 pm_constant_id_list_init_capacity(parser->arena, list, locals->
size);
992 uint32_t capacity = locals->
capacity < PM_LOCALS_HASH_THRESHOLD ? locals->
size : locals->
capacity;
996 bool warn_unused = !toplevel || (!parser->parsing_eval && !PM_PARSER_COMMAND_LINE_OPTION_E(parser));
998 for (uint32_t index = 0; index < capacity; index++) {
1002 pm_constant_id_list_insert(list, (
size_t) local->
index, local->
name);
1004 if (warn_unused && local->
reads == 0 && ((parser->start_line >= 0) || (pm_line_offset_list_line(&parser->line_offsets, local->
location.
start, parser->start_line) >= 0))) {
1005 pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, local->
name);
1007 if (constant->
length >= 1 && *constant->
start !=
'_') {
1008 PM_PARSER_WARN_FORMAT(
1012 PM_WARN_UNUSED_LOCAL_VARIABLE,
1014 (
const char *) constant->
start
1030pm_parser_constant_id_raw(
pm_parser_t *parser,
const uint8_t *start,
const uint8_t *end) {
1031 return pm_constant_pool_insert_shared(&parser->
constant_pool, start, (
size_t) (end - start));
1038pm_parser_constant_id_owned(
pm_parser_t *parser, uint8_t *start,
size_t length) {
1039 return pm_constant_pool_insert_owned(&parser->
constant_pool, start, length);
1046pm_parser_constant_id_constant(
pm_parser_t *parser,
const char *start,
size_t length) {
1047 return pm_constant_pool_insert_constant(&parser->
constant_pool, (
const uint8_t *) start, length);
1055 return pm_parser_constant_id_raw(parser, token->start, token->end);
1062#define PM_CASE_VOID_VALUE PM_RETURN_NODE: case PM_BREAK_NODE: case PM_NEXT_NODE: \
1063 case PM_REDO_NODE: case PM_RETRY_NODE: case PM_MATCH_REQUIRED_NODE
1074 while (node != NULL) {
1075 switch (PM_NODE_TYPE(node)) {
1076 case PM_CASE_VOID_VALUE:
1077 return void_node != NULL ? void_node : node;
1078 case PM_MATCH_PREDICATE_NODE:
1080 case PM_BEGIN_NODE: {
1086 if (vn != NULL)
return vn;
1091 if (vn != NULL)
return vn;
1101 if (vn == NULL)
return NULL;
1102 if (void_node == NULL) void_node = vn;
1106 pm_node_t *vn = pm_check_value_expression(parser, UP(rescue_clause->statements));
1123 pm_node_t *vn = pm_check_value_expression(parser, node);
1124 if (vn != NULL)
return vn;
1135 case PM_CASE_NODE: {
1146 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
1150 if (vn == NULL)
return NULL;
1151 if (void_node == NULL) void_node = vn;
1154 node = UP(cast->else_clause);
1157 case PM_CASE_MATCH_NODE: {
1168 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
1172 if (vn == NULL)
return NULL;
1173 if (void_node == NULL) void_node = vn;
1176 node = UP(cast->else_clause);
1179 case PM_ENSURE_NODE: {
1184 case PM_PARENTHESES_NODE: {
1186 node = UP(cast->
body);
1189 case PM_STATEMENTS_NODE: {
1196 switch (PM_NODE_TYPE(body_part)) {
1197 case PM_CASE_VOID_VALUE:
1198 if (void_node == NULL) {
1199 void_node = body_part;
1219 if (void_node == NULL) {
1225 case PM_UNLESS_NODE: {
1234 if (void_node == NULL) {
1240 case PM_ELSE_NODE: {
1255 case PM_LOCAL_VARIABLE_WRITE_NODE: {
1259 for (uint32_t depth = 0; depth < cast->
depth; depth++) scope = scope->
previous;
1274 pm_node_t *void_node = pm_check_value_expression(parser, node);
1275 if (void_node != NULL) {
1276 pm_parser_err_node(parser, void_node, PM_ERR_VOID_EXPRESSION);
1285 const char *
type = NULL;
1288 switch (PM_NODE_TYPE(node)) {
1289 case PM_BACK_REFERENCE_READ_NODE:
1290 case PM_CLASS_VARIABLE_READ_NODE:
1291 case PM_GLOBAL_VARIABLE_READ_NODE:
1292 case PM_INSTANCE_VARIABLE_READ_NODE:
1293 case PM_LOCAL_VARIABLE_READ_NODE:
1294 case PM_NUMBERED_REFERENCE_READ_NODE:
1295 type =
"a variable";
1298 case PM_CALL_NODE: {
1303 switch (message->
length) {
1305 switch (message->
start[0]) {
1322 switch (message->
start[1]) {
1324 if (message->
start[0] ==
'<' || message->
start[0] ==
'>' || message->
start[0] ==
'!' || message->
start[0] ==
'=') {
1330 if (message->
start[0] ==
'+' || message->
start[0] ==
'-') {
1336 if (message->
start[0] ==
'*') {
1344 if (memcmp(message->
start,
"<=>", 3) == 0) {
1353 case PM_CONSTANT_PATH_NODE:
1357 case PM_CONSTANT_READ_NODE:
1358 type =
"a constant";
1361 case PM_DEFINED_NODE:
1370 case PM_IMAGINARY_NODE:
1371 case PM_INTEGER_NODE:
1372 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1373 case PM_INTERPOLATED_STRING_NODE:
1374 case PM_RATIONAL_NODE:
1375 case PM_REGULAR_EXPRESSION_NODE:
1376 case PM_SOURCE_ENCODING_NODE:
1377 case PM_SOURCE_FILE_NODE:
1378 case PM_SOURCE_LINE_NODE:
1379 case PM_STRING_NODE:
1380 case PM_SYMBOL_NODE:
1388 case PM_RANGE_NODE: {
1391 if (PM_NODE_FLAG_P(cast, PM_RANGE_FLAGS_EXCLUDE_END)) {
1414 PM_PARSER_WARN_NODE_FORMAT(parser, node, PM_WARN_VOID_STATEMENT, length,
type);
1425 const size_t size = node->
body.
size - (last_value ? 1 : 0);
1426 for (
size_t index = 0; index < size; index++) {
1427 pm_void_statement_check(parser, node->
body.
nodes[index]);
1437 PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL,
1438 PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP,
1439 PM_CONDITIONAL_PREDICATE_TYPE_NOT
1440} pm_conditional_predicate_type_t;
1448 case PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL:
1449 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix,
"condition");
1451 case PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP:
1452 PM_PARSER_WARN_NODE_FORMAT(parser, node, diag_id, prefix,
"flip-flop");
1454 case PM_CONDITIONAL_PREDICATE_TYPE_NOT:
1464pm_conditional_predicate_warn_write_literal_p(
const pm_node_t *node) {
1465 switch (PM_NODE_TYPE(node)) {
1466 case PM_ARRAY_NODE: {
1467 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL))
return true;
1470 for (
size_t index = 0; index < cast->
elements.
size; index++) {
1471 if (!pm_conditional_predicate_warn_write_literal_p(cast->
elements.
nodes[index]))
return false;
1476 case PM_HASH_NODE: {
1477 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL))
return true;
1480 for (
size_t index = 0; index < cast->
elements.
size; index++) {
1482 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE))
return false;
1485 if (!pm_conditional_predicate_warn_write_literal_p(assoc->
key) || !pm_conditional_predicate_warn_write_literal_p(assoc->
value))
return false;
1492 case PM_IMAGINARY_NODE:
1493 case PM_INTEGER_NODE:
1495 case PM_RATIONAL_NODE:
1496 case PM_REGULAR_EXPRESSION_NODE:
1497 case PM_SOURCE_ENCODING_NODE:
1498 case PM_SOURCE_FILE_NODE:
1499 case PM_SOURCE_LINE_NODE:
1500 case PM_STRING_NODE:
1501 case PM_SYMBOL_NODE:
1515 if (pm_conditional_predicate_warn_write_literal_p(node)) {
1534 switch (PM_NODE_TYPE(node)) {
1537 pm_conditional_predicate(parser, cast->
left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1538 pm_conditional_predicate(parser, cast->
right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1543 pm_conditional_predicate(parser, cast->
left, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1544 pm_conditional_predicate(parser, cast->
right, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
1547 case PM_PARENTHESES_NODE: {
1550 if ((cast->
body != NULL) && PM_NODE_TYPE_P(cast->
body, PM_STATEMENTS_NODE)) {
1552 if (statements->body.size == 1) pm_conditional_predicate(parser, statements->body.nodes[0],
type);
1557 case PM_BEGIN_NODE: {
1565 case PM_RANGE_NODE: {
1568 if (cast->
left != NULL) pm_conditional_predicate(parser, cast->
left, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1569 if (cast->
right != NULL) pm_conditional_predicate(parser, cast->
right, PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP);
1577 node->
type = PM_FLIP_FLOP_NODE;
1581 case PM_REGULAR_EXPRESSION_NODE:
1586 node->
type = PM_MATCH_LAST_LINE_NODE;
1588 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1589 pm_parser_warn_conditional_predicate_literal(parser, node,
type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
"regex ");
1593 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
1598 node->
type = PM_INTERPOLATED_MATCH_LAST_LINE_NODE;
1600 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1601 pm_parser_warn_conditional_predicate_literal(parser, node,
type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
"regex ");
1605 case PM_INTEGER_NODE:
1606 if (
type == PM_CONDITIONAL_PREDICATE_TYPE_FLIP_FLOP) {
1607 if (!PM_PARSER_COMMAND_LINE_OPTION_E(parser)) {
1608 pm_parser_warn_node(parser, node, PM_WARN_INTEGER_IN_FLIP_FLOP);
1611 pm_parser_warn_conditional_predicate_literal(parser, node,
type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
"");
1614 case PM_STRING_NODE:
1615 case PM_SOURCE_FILE_NODE:
1616 case PM_INTERPOLATED_STRING_NODE:
1617 pm_parser_warn_conditional_predicate_literal(parser, node,
type, PM_WARN_LITERAL_IN_CONDITION_DEFAULT,
"string ");
1619 case PM_SYMBOL_NODE:
1620 case PM_INTERPOLATED_SYMBOL_NODE:
1621 pm_parser_warn_conditional_predicate_literal(parser, node,
type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
"symbol ");
1623 case PM_SOURCE_LINE_NODE:
1624 case PM_SOURCE_ENCODING_NODE:
1626 case PM_RATIONAL_NODE:
1627 case PM_IMAGINARY_NODE:
1628 pm_parser_warn_conditional_predicate_literal(parser, node,
type, PM_WARN_LITERAL_IN_CONDITION_VERBOSE,
"");
1630 case PM_CLASS_VARIABLE_WRITE_NODE:
1633 case PM_CONSTANT_WRITE_NODE:
1636 case PM_GLOBAL_VARIABLE_WRITE_NODE:
1639 case PM_INSTANCE_VARIABLE_WRITE_NODE:
1642 case PM_LOCAL_VARIABLE_WRITE_NODE:
1645 case PM_MULTI_WRITE_NODE:
1681 if (arguments->
block != NULL) {
1682 uint32_t end = PM_NODE_END(arguments->
block);
1685 uint32_t arguments_end = PM_LOCATION_END(&arguments->
closing_loc);
1686 if (arguments_end > end) {
1729 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
1743char_is_identifier_start(
const pm_parser_t *parser,
const uint8_t *b, ptrdiff_t n) {
1744 if (n <= 0)
return 0;
1751 }
else if (*b ==
'_') {
1753 }
else if (*b >= 0x80) {
1758 }
else if (*b < 0x80) {
1761 return pm_encoding_utf_8_char_width(b, n);
1770char_is_identifier_utf8(
const uint8_t *b, ptrdiff_t n) {
1773 }
else if (*b < 0x80) {
1776 return pm_encoding_utf_8_char_width(b, n);
1786char_is_identifier(
const pm_parser_t *parser,
const uint8_t *b, ptrdiff_t n) {
1794 }
else if (*b ==
'_') {
1796 }
else if (*b >= 0x80) {
1802 return char_is_identifier_utf8(b, n);
1809#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
1810#define PUNCT(idx) ( \
1811 BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
1812 BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
1813 BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
1814 BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
1815 BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
1818const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
1824char_is_global_name_punctuation(const uint8_t b) {
1825 const unsigned int i = (const unsigned int) b;
1826 if (i <= 0x20 || 0x7e < i) return false;
1828 return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
1832token_is_setter_name(pm_token_t *token) {
1834 (token->type == PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL) ||
1835 ((token->type == PM_TOKEN_IDENTIFIER) &&
1836 (token->end - token->start >= 2) &&
1837 (token->end[-1] == '='))
1845pm_local_is_keyword(const char *source, size_t length) {
1846#define KEYWORD(name) if (memcmp(source, name, length) == 0) return true
1850 switch (source[0]) {
1851 case 'd': KEYWORD("do"); return false;
1852 case 'i': KEYWORD("if"); KEYWORD("in"); return false;
1853 case 'o': KEYWORD("or"); return false;
1854 default: return false;
1857 switch (source[0]) {
1858 case 'a': KEYWORD("and"); return false;
1859 case 'd': KEYWORD("def"); return false;
1860 case 'e': KEYWORD("end"); return false;
1861 case 'f': KEYWORD("for"); return false;
1862 case 'n': KEYWORD("nil"); KEYWORD("not"); return false;
1863 default: return false;
1866 switch (source[0]) {
1867 case 'c': KEYWORD("case"); return false;
1868 case 'e': KEYWORD("else"); return false;
1869 case 'n': KEYWORD("next"); return false;
1870 case 'r': KEYWORD("redo"); return false;
1871 case 's': KEYWORD("self"); return false;
1872 case 't': KEYWORD("then"); KEYWORD("true"); return false;
1873 case 'w': KEYWORD("when"); return false;
1874 default: return false;
1877 switch (source[0]) {
1878 case 'a': KEYWORD("alias"); return false;
1879 case 'b': KEYWORD("begin"); KEYWORD("break"); return false;
1880 case 'c': KEYWORD("class"); return false;
1881 case 'e': KEYWORD("elsif"); return false;
1882 case 'f': KEYWORD("false"); return false;
1883 case 'r': KEYWORD("retry"); return false;
1884 case 's': KEYWORD("super"); return false;
1885 case 'u': KEYWORD("undef"); KEYWORD("until"); return false;
1886 case 'w': KEYWORD("while"); return false;
1887 case 'y': KEYWORD("yield"); return false;
1888 default: return false;
1891 switch (source[0]) {
1892 case 'e': KEYWORD("ensure"); return false;
1893 case 'm': KEYWORD("module"); return false;
1894 case 'r': KEYWORD("rescue"); KEYWORD("return"); return false;
1895 case 'u': KEYWORD("unless"); return false;
1896 default: return false;
1899 KEYWORD("__LINE__");
1900 KEYWORD("__FILE__");
1903 KEYWORD("__ENCODING__");
1912/******************************************************************************/
1913/* Node flag handling functions */
1914/******************************************************************************/
1920pm_node_flag_set(pm_node_t *node, pm_node_flags_t flag) {
1921 node->flags |= flag;
1928pm_node_flag_unset(pm_node_t *node, pm_node_flags_t flag) {
1929 node->flags &= (pm_node_flags_t) ~flag;
1936pm_node_flag_set_repeated_parameter(pm_node_t *node) {
1937 assert(PM_NODE_TYPE(node) == PM_BLOCK_LOCAL_VARIABLE_NODE ||
1938 PM_NODE_TYPE(node) == PM_BLOCK_PARAMETER_NODE ||
1939 PM_NODE_TYPE(node) == PM_KEYWORD_REST_PARAMETER_NODE ||
1940 PM_NODE_TYPE(node) == PM_OPTIONAL_KEYWORD_PARAMETER_NODE ||
1941 PM_NODE_TYPE(node) == PM_OPTIONAL_PARAMETER_NODE ||
1942 PM_NODE_TYPE(node) == PM_REQUIRED_KEYWORD_PARAMETER_NODE ||
1943 PM_NODE_TYPE(node) == PM_REQUIRED_PARAMETER_NODE ||
1944 PM_NODE_TYPE(node) == PM_REST_PARAMETER_NODE);
1946 pm_node_flag_set(node, PM_PARAMETER_FLAGS_REPEATED_PARAMETER);
1949/******************************************************************************/
1950/* Node creation functions */
1951/******************************************************************************/
1958#define PM_REGULAR_EXPRESSION_ENCODING_MASK ~(PM_REGULAR_EXPRESSION_FLAGS_EUC_JP | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J | PM_REGULAR_EXPRESSION_FLAGS_UTF_8)
1963static inline pm_node_flags_t
1964pm_regular_expression_flags_create(pm_parser_t *parser, const pm_token_t *closing) {
1965 pm_node_flags_t flags = 0;
1967 if (closing->type == PM_TOKEN_REGEXP_END) {
1968 pm_buffer_t unknown_flags = { 0 };
1970 for (const uint8_t *flag = closing->start + 1; flag < closing->end; flag++) {
1972 case 'i': flags |= PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE; break;
1973 case 'm': flags |= PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE; break;
1974 case 'x': flags |= PM_REGULAR_EXPRESSION_FLAGS_EXTENDED; break;
1975 case 'o': flags |= PM_REGULAR_EXPRESSION_FLAGS_ONCE; break;
1977 case 'e': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_EUC_JP); break;
1978 case 'n': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT); break;
1979 case 's': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J); break;
1980 case 'u': flags = (pm_node_flags_t) (((pm_node_flags_t) (flags & PM_REGULAR_EXPRESSION_ENCODING_MASK)) | PM_REGULAR_EXPRESSION_FLAGS_UTF_8); break;
1982 default: pm_buffer_append_byte(&unknown_flags, *flag);
1986 size_t unknown_flags_length = pm_buffer_length(&unknown_flags);
1987 if (unknown_flags_length != 0) {
1988 const char *word = unknown_flags_length >= 2 ? "options" : "option";
1989 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->previous, PM_ERR_REGEXP_UNKNOWN_OPTIONS, word, unknown_flags_length, pm_buffer_value(&unknown_flags));
1991 pm_buffer_free(&unknown_flags);
1997#undef PM_REGULAR_EXPRESSION_ENCODING_MASK
1999static pm_statements_node_t *
2000pm_statements_node_create(pm_parser_t *parser);
2003pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline);
2006pm_statements_node_body_length(pm_statements_node_t *node);
2013pm_integer_arena_move(pm_arena_t *arena, pm_integer_t *integer) {
2014 if (integer->values != NULL) {
2015 size_t byte_size = integer->length * sizeof(uint32_t);
2016 uint32_t *old_values = integer->values;
2017 integer->values = (uint32_t *) pm_arena_memdup(arena, old_values, byte_size, PRISM_ALIGNOF(uint32_t));
2025static pm_missing_node_t *
2026pm_missing_node_create(pm_parser_t *parser, uint32_t start, uint32_t length) {
2027 return pm_missing_node_new(
2031 ((pm_location_t) { .start = start, .length = length })
2038static pm_alias_global_variable_node_t *
2039pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2040 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2042 return pm_alias_global_variable_node_new(
2046 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2049 TOK2LOC(parser, keyword)
2056static pm_alias_method_node_t *
2057pm_alias_method_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *new_name, pm_node_t *old_name) {
2058 assert(keyword->type == PM_TOKEN_KEYWORD_ALIAS);
2060 return pm_alias_method_node_new(
2064 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, old_name),
2067 TOK2LOC(parser, keyword)
2074static pm_alternation_pattern_node_t *
2075pm_alternation_pattern_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *right, const pm_token_t *operator) {
2076 return pm_alternation_pattern_node_new(
2080 PM_LOCATION_INIT_NODES(left, right),
2083 TOK2LOC(parser, operator)
2090static pm_and_node_t *
2091pm_and_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
2092 pm_assert_value_expression(parser, left);
2094 return pm_and_node_new(
2098 PM_LOCATION_INIT_NODES(left, right),
2101 TOK2LOC(parser, operator)
2108static pm_arguments_node_t *
2109pm_arguments_node_create(pm_parser_t *parser) {
2110 return pm_arguments_node_new(
2114 PM_LOCATION_INIT_UNSET,
2115 ((pm_node_list_t) { 0 })
2123pm_arguments_node_size(pm_arguments_node_t *node) {
2124 return node->arguments.size;
2131pm_arguments_node_arguments_append(pm_arena_t *arena, pm_arguments_node_t *node, pm_node_t *argument) {
2132 if (pm_arguments_node_size(node) == 0) {
2133 PM_NODE_START_SET_NODE(node, argument);
2136 if (PM_NODE_END(node) < PM_NODE_END(argument)) {
2137 PM_NODE_LENGTH_SET_NODE(node, argument);
2140 pm_node_list_append(arena, &node->arguments, argument);
2142 if (PM_NODE_TYPE_P(argument, PM_SPLAT_NODE)) {
2143 if (PM_NODE_FLAG_P(node, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
2144 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_MULTIPLE_SPLATS);
2146 pm_node_flag_set(UP(node), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT);
2154static pm_array_node_t *
2155pm_array_node_create(pm_parser_t *parser, const pm_token_t *opening) {
2156 if (opening == NULL) {
2157 return pm_array_node_new(
2160 PM_NODE_FLAG_STATIC_LITERAL,
2161 PM_LOCATION_INIT_UNSET,
2162 ((pm_node_list_t) { 0 }),
2163 ((pm_location_t) { 0 }),
2164 ((pm_location_t) { 0 })
2167 return pm_array_node_new(
2170 PM_NODE_FLAG_STATIC_LITERAL,
2171 PM_LOCATION_INIT_TOKEN(parser, opening),
2172 ((pm_node_list_t) { 0 }),
2173 TOK2LOC(parser, opening),
2174 TOK2LOC(parser, opening)
2183pm_array_node_elements_append(pm_arena_t *arena, pm_array_node_t *node, pm_node_t *element) {
2184 if (!node->elements.size && !node->opening_loc.length) {
2185 PM_NODE_START_SET_NODE(node, element);
2188 pm_node_list_append(arena, &node->elements, element);
2189 PM_NODE_LENGTH_SET_NODE(node, element);
2191 // If the element is not a static literal, then the array is not a static
2192 // literal. Turn that flag off.
2193 if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || !PM_NODE_FLAG_P(element, PM_NODE_FLAG_STATIC_LITERAL)) {
2194 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
2197 if (PM_NODE_TYPE_P(element, PM_SPLAT_NODE)) {
2198 pm_node_flag_set(UP(node), PM_ARRAY_NODE_FLAGS_CONTAINS_SPLAT);
2206pm_array_node_close_set(const pm_parser_t *parser, pm_array_node_t *node, const pm_token_t *closing) {
2207 assert(closing->type == PM_TOKEN_BRACKET_RIGHT || closing->type == PM_TOKEN_STRING_END || closing->type == 0);
2208 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2209 node->closing_loc = TOK2LOC(parser, closing);
2216static pm_array_pattern_node_t *
2217pm_array_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *nodes) {
2218 pm_array_pattern_node_t *node = pm_array_pattern_node_new(
2222 PM_LOCATION_INIT_NODES(nodes->nodes[0], nodes->nodes[nodes->size - 1]),
2224 ((pm_node_list_t) { 0 }),
2226 ((pm_node_list_t) { 0 }),
2227 ((pm_location_t) { 0 }),
2228 ((pm_location_t) { 0 })
2231 // For now we're going to just copy over each pointer manually. This could be
2232 // much more efficient, as we could instead resize the node list.
2233 bool found_rest = false;
2236 PM_NODE_LIST_FOREACH(nodes, index, child) {
2237 if (!found_rest && (PM_NODE_TYPE_P(child, PM_SPLAT_NODE) || PM_NODE_TYPE_P(child, PM_IMPLICIT_REST_NODE))) {
2240 } else if (found_rest) {
2241 pm_node_list_append(parser->arena, &node->posts, child);
2243 pm_node_list_append(parser->arena, &node->requireds, child);
2253static pm_array_pattern_node_t *
2254pm_array_pattern_node_rest_create(pm_parser_t *parser, pm_node_t *rest) {
2255 return pm_array_pattern_node_new(
2259 PM_LOCATION_INIT_NODE(rest),
2261 ((pm_node_list_t) { 0 }),
2263 ((pm_node_list_t) { 0 }),
2264 ((pm_location_t) { 0 }),
2265 ((pm_location_t) { 0 })
2273static pm_array_pattern_node_t *
2274pm_array_pattern_node_constant_create(pm_parser_t *parser, pm_node_t *constant, const pm_token_t *opening, const pm_token_t *closing) {
2275 return pm_array_pattern_node_new(
2279 PM_LOCATION_INIT_NODE_TOKEN(parser, constant, closing),
2281 ((pm_node_list_t) { 0 }),
2283 ((pm_node_list_t) { 0 }),
2284 TOK2LOC(parser, opening),
2285 TOK2LOC(parser, closing)
2293static pm_array_pattern_node_t *
2294pm_array_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
2295 return pm_array_pattern_node_new(
2299 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2301 ((pm_node_list_t) { 0 }),
2303 ((pm_node_list_t) { 0 }),
2304 TOK2LOC(parser, opening),
2305 TOK2LOC(parser, closing)
2310pm_array_pattern_node_requireds_append(pm_arena_t *arena, pm_array_pattern_node_t *node, pm_node_t *inner) {
2311 pm_node_list_append(arena, &node->requireds, inner);
2317static pm_assoc_node_t *
2318pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *operator, pm_node_t *value) {
2321 if (value != NULL && PM_NODE_END(value) > PM_NODE_END(key)) {
2322 end = PM_NODE_END(value);
2323 } else if (operator != NULL) {
2324 end = PM_TOKEN_END(parser, operator);
2326 end = PM_NODE_END(key);
2329 // Hash string keys will be frozen, so we can mark them as frozen here so
2330 // that the compiler picks them up and also when we check for static literal
2331 // on the keys it gets factored in.
2332 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
2333 key->flags |= PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL;
2336 // If the key and value of this assoc node are both static literals, then
2337 // we can mark this node as a static literal.
2338 pm_node_flags_t flags = 0;
2340 !PM_NODE_TYPE_P(key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(key, PM_HASH_NODE) && !PM_NODE_TYPE_P(key, PM_RANGE_NODE) &&
2341 value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)
2343 flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
2346 return pm_assoc_node_new(
2350 ((pm_location_t) { .start = PM_NODE_START(key), .length = U32(end - PM_NODE_START(key)) }),
2353 NTOK2LOC(parser, operator)
2360static pm_assoc_splat_node_t *
2361pm_assoc_splat_node_create(pm_parser_t *parser, pm_node_t *value, const pm_token_t *operator) {
2362 assert(operator->type == PM_TOKEN_USTAR_STAR);
2364 return pm_assoc_splat_node_new(
2368 (value == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, value),
2370 TOK2LOC(parser, operator)
2377static pm_back_reference_read_node_t *
2378pm_back_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
2379 assert(name->type == PM_TOKEN_BACK_REFERENCE);
2381 return pm_back_reference_read_node_new(
2385 PM_LOCATION_INIT_TOKEN(parser, name),
2386 pm_parser_constant_id_token(parser, name)
2393static pm_begin_node_t *
2394pm_begin_node_create(pm_parser_t *parser, const pm_token_t *begin_keyword, pm_statements_node_t *statements) {
2395 uint32_t start = begin_keyword == NULL ? 0 : PM_TOKEN_START(parser, begin_keyword);
2396 uint32_t end = statements == NULL ? (begin_keyword == NULL ? 0 : PM_TOKEN_END(parser, begin_keyword)) : PM_NODE_END(statements);
2398 return pm_begin_node_new(
2402 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2403 NTOK2LOC(parser, begin_keyword),
2408 ((pm_location_t) { 0 })
2416pm_begin_node_rescue_clause_set(pm_begin_node_t *node, pm_rescue_node_t *rescue_clause) {
2417 if (node->begin_keyword_loc.length == 0) {
2418 PM_NODE_START_SET_NODE(node, rescue_clause);
2420 PM_NODE_LENGTH_SET_NODE(node, rescue_clause);
2421 node->rescue_clause = rescue_clause;
2428pm_begin_node_else_clause_set(pm_begin_node_t *node, pm_else_node_t *else_clause) {
2429 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2430 PM_NODE_START_SET_NODE(node, else_clause);
2432 PM_NODE_LENGTH_SET_NODE(node, else_clause);
2433 node->else_clause = else_clause;
2440pm_begin_node_ensure_clause_set(pm_begin_node_t *node, pm_ensure_node_t *ensure_clause) {
2441 if ((node->begin_keyword_loc.length == 0) && PM_NODE_START(node) == 0) {
2442 PM_NODE_START_SET_NODE(node, ensure_clause);
2444 PM_NODE_LENGTH_SET_NODE(node, ensure_clause);
2445 node->ensure_clause = ensure_clause;
2452pm_begin_node_end_keyword_set(const pm_parser_t *parser, pm_begin_node_t *node, const pm_token_t *end_keyword) {
2453 assert(end_keyword->type == PM_TOKEN_KEYWORD_END || end_keyword->type == 0);
2454 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
2455 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
2461static pm_block_argument_node_t *
2462pm_block_argument_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
2463 assert(operator->type == PM_TOKEN_UAMPERSAND);
2465 return pm_block_argument_node_new(
2469 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
2471 TOK2LOC(parser, operator)
2478static pm_block_node_t *
2479pm_block_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *opening, pm_node_t *parameters, pm_node_t *body, const pm_token_t *closing) {
2480 return pm_block_node_new(
2484 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
2488 TOK2LOC(parser, opening),
2489 TOK2LOC(parser, closing)
2496static pm_block_parameter_node_t *
2497pm_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator) {
2498 assert(operator->type == PM_TOKEN_UAMPERSAND || operator->type == PM_TOKEN_AMPERSAND);
2500 return pm_block_parameter_node_new(
2504 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
2505 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
2506 NTOK2LOC(parser, name),
2507 TOK2LOC(parser, operator)
2514static pm_block_parameters_node_t *
2515pm_block_parameters_node_create(pm_parser_t *parser, pm_parameters_node_t *parameters, const pm_token_t *opening) {
2517 if (opening != NULL) {
2518 start = PM_TOKEN_START(parser, opening);
2519 } else if (parameters != NULL) {
2520 start = PM_NODE_START(parameters);
2526 if (parameters != NULL) {
2527 end = PM_NODE_END(parameters);
2528 } else if (opening != NULL) {
2529 end = PM_TOKEN_END(parser, opening);
2534 return pm_block_parameters_node_new(
2538 ((pm_location_t) { .start = start, .length = U32(end - start) }),
2540 ((pm_node_list_t) { 0 }),
2541 NTOK2LOC(parser, opening),
2542 ((pm_location_t) { 0 })
2550pm_block_parameters_node_closing_set(const pm_parser_t *parser, pm_block_parameters_node_t *node, const pm_token_t *closing) {
2551 assert(closing->type == PM_TOKEN_PIPE || closing->type == PM_TOKEN_PARENTHESIS_RIGHT || closing->type == 0);
2552 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
2553 node->closing_loc = TOK2LOC(parser, closing);
2559static pm_block_local_variable_node_t *
2560pm_block_local_variable_node_create(pm_parser_t *parser, const pm_token_t *name) {
2561 return pm_block_local_variable_node_new(
2565 PM_LOCATION_INIT_TOKEN(parser, name),
2566 pm_parser_constant_id_token(parser, name)
2574pm_block_parameters_node_append_local(pm_arena_t *arena, pm_block_parameters_node_t *node, const pm_block_local_variable_node_t *local) {
2575 pm_node_list_append(arena, &node->locals, UP(local));
2577 if (PM_NODE_LENGTH(node) == 0) {
2578 PM_NODE_START_SET_NODE(node, local);
2581 PM_NODE_LENGTH_SET_NODE(node, local);
2587static pm_break_node_t *
2588pm_break_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
2589 assert(keyword->type == PM_TOKEN_KEYWORD_BREAK);
2591 return pm_break_node_new(
2595 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
2597 TOK2LOC(parser, keyword)
2601// There are certain flags that we want to use internally but don't want to
2602// expose because they are not relevant beyond parsing. Therefore we'll define
2603// them here and not define them in config.yml/a header file.
2604static const pm_node_flags_t PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY = (1 << 2);
2606static const pm_node_flags_t PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY = ((PM_CALL_NODE_FLAGS_LAST - 1) << 1);
2607static const pm_node_flags_t PM_CALL_NODE_FLAGS_COMPARISON = ((PM_CALL_NODE_FLAGS_LAST - 1) << 2);
2608static const pm_node_flags_t PM_CALL_NODE_FLAGS_INDEX = ((PM_CALL_NODE_FLAGS_LAST - 1) << 3);
2615static pm_call_node_t *
2616pm_call_node_create(pm_parser_t *parser, pm_node_flags_t flags) {
2617 return pm_call_node_new(
2621 PM_LOCATION_INIT_UNSET,
2623 ((pm_location_t) { 0 }),
2625 ((pm_location_t) { 0 }),
2626 ((pm_location_t) { 0 }),
2628 ((pm_location_t) { 0 }),
2629 ((pm_location_t) { 0 }),
2638static inline pm_node_flags_t
2639pm_call_node_ignore_visibility_flag(const pm_node_t *receiver) {
2640 return PM_NODE_TYPE_P(receiver, PM_SELF_NODE) ? PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY : 0;
2647static pm_call_node_t *
2648pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_t *arguments) {
2649 pm_assert_value_expression(parser, receiver);
2651 pm_node_flags_t flags = pm_call_node_ignore_visibility_flag(receiver);
2652 if (arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_ARGUMENT_NODE)) {
2653 flags |= PM_CALL_NODE_FLAGS_INDEX;
2656 pm_call_node_t *node = pm_call_node_create(parser, flags);
2658 PM_NODE_START_SET_NODE(node, receiver);
2660 const pm_location_t *end = pm_arguments_end(arguments);
2661 assert(end != NULL && "unreachable");
2662 PM_NODE_LENGTH_SET_LOCATION(node, end);
2664 node->receiver = receiver;
2665 node->message_loc.start = arguments->opening_loc.start;
2666 node->message_loc.length = (arguments->closing_loc.start + arguments->closing_loc.length) - arguments->opening_loc.start;
2668 node->opening_loc = arguments->opening_loc;
2669 node->arguments = arguments->arguments;
2670 node->closing_loc = arguments->closing_loc;
2671 node->block = arguments->block;
2673 node->name = pm_parser_constant_id_constant(parser, "[]", 2);
2680static pm_call_node_t *
2681pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_node_t *argument, pm_node_flags_t flags) {
2682 pm_assert_value_expression(parser, receiver);
2683 pm_assert_value_expression(parser, argument);
2685 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver) | flags);
2687 PM_NODE_START_SET_NODE(node, PM_NODE_START(receiver) < PM_NODE_START(argument) ? receiver : argument);
2688 PM_NODE_LENGTH_SET_NODE(node, PM_NODE_END(receiver) > PM_NODE_END(argument) ? receiver : argument);
2690 node->receiver = receiver;
2691 node->message_loc = TOK2LOC(parser, operator);
2693 pm_arguments_node_t *arguments = pm_arguments_node_create(parser);
2694 pm_arguments_node_arguments_append(parser->arena, arguments, argument);
2695 node->arguments = arguments;
2697 node->name = pm_parser_constant_id_token(parser, operator);
2701static const uint8_t * parse_operator_symbol_name(const pm_token_t *);
2706static pm_call_node_t *
2707pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_token_t *message, pm_arguments_t *arguments) {
2708 pm_assert_value_expression(parser, receiver);
2710 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2712 PM_NODE_START_SET_NODE(node, receiver);
2713 const pm_location_t *end = pm_arguments_end(arguments);
2715 PM_NODE_LENGTH_SET_TOKEN(parser, node, message);
2717 PM_NODE_LENGTH_SET_LOCATION(node, end);
2720 node->receiver = receiver;
2721 node->call_operator_loc = TOK2LOC(parser, operator);
2722 node->message_loc = TOK2LOC(parser, message);
2723 node->opening_loc = arguments->opening_loc;
2724 node->arguments = arguments->arguments;
2725 node->closing_loc = arguments->closing_loc;
2726 node->block = arguments->block;
2728 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2729 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2736 node->name = pm_parser_constant_id_raw(parser, message->start, parse_operator_symbol_name(message));
2743static pm_call_node_t *
2744pm_call_node_call_synthesized_create(pm_parser_t *parser, pm_node_t *receiver, const char *message, pm_arguments_node_t *arguments) {
2745 pm_call_node_t *node = pm_call_node_create(parser, 0);
2746 node->base.location = (pm_location_t) { .start = 0, .length = U32(parser->end - parser->start) };
2748 node->receiver = receiver;
2749 node->arguments = arguments;
2751 node->name = pm_parser_constant_id_constant(parser, message, strlen(message));
2759static pm_call_node_t *
2760pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments_t *arguments) {
2761 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2763 PM_NODE_START_SET_TOKEN(parser, node, message);
2764 const pm_location_t *end = pm_arguments_end(arguments);
2765 assert(end != NULL && "unreachable");
2766 PM_NODE_LENGTH_SET_LOCATION(node, end);
2768 node->message_loc = TOK2LOC(parser, message);
2769 node->opening_loc = arguments->opening_loc;
2770 node->arguments = arguments->arguments;
2771 node->closing_loc = arguments->closing_loc;
2772 node->block = arguments->block;
2774 node->name = pm_parser_constant_id_token(parser, message);
2782static pm_call_node_t *
2783pm_call_node_fcall_synthesized_create(pm_parser_t *parser, pm_arguments_node_t *arguments, pm_constant_id_t name) {
2784 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2786 node->base.location = (pm_location_t) { 0 };
2787 node->arguments = arguments;
2796static pm_call_node_t *
2797pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *message, pm_arguments_t *arguments) {
2798 pm_assert_value_expression(parser, receiver);
2799 if (receiver != NULL) pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
2801 pm_call_node_t *node = pm_call_node_create(parser, receiver == NULL ? 0 : pm_call_node_ignore_visibility_flag(receiver));
2803 PM_NODE_START_SET_TOKEN(parser, node, message);
2804 if (arguments->closing_loc.length > 0) {
2805 PM_NODE_LENGTH_SET_LOCATION(node, &arguments->closing_loc);
2807 assert(receiver != NULL);
2808 PM_NODE_LENGTH_SET_NODE(node, receiver);
2811 node->receiver = receiver;
2812 node->message_loc = TOK2LOC(parser, message);
2813 node->opening_loc = arguments->opening_loc;
2814 node->arguments = arguments->arguments;
2815 node->closing_loc = arguments->closing_loc;
2817 node->name = pm_parser_constant_id_constant(parser, "!", 1);
2824static pm_call_node_t *
2825pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *operator, pm_arguments_t *arguments) {
2826 pm_assert_value_expression(parser, receiver);
2828 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2830 PM_NODE_START_SET_NODE(node, receiver);
2831 const pm_location_t *end = pm_arguments_end(arguments);
2832 assert(end != NULL && "unreachable");
2833 PM_NODE_LENGTH_SET_LOCATION(node, end);
2835 node->receiver = receiver;
2836 node->call_operator_loc = TOK2LOC(parser, operator);
2837 node->opening_loc = arguments->opening_loc;
2838 node->arguments = arguments->arguments;
2839 node->closing_loc = arguments->closing_loc;
2840 node->block = arguments->block;
2842 if (operator->type == PM_TOKEN_AMPERSAND_DOT) {
2843 pm_node_flag_set(UP(node), PM_CALL_NODE_FLAGS_SAFE_NAVIGATION);
2846 node->name = pm_parser_constant_id_constant(parser, "call", 4);
2853static pm_call_node_t *
2854pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *receiver, const char *name) {
2855 pm_assert_value_expression(parser, receiver);
2857 pm_call_node_t *node = pm_call_node_create(parser, pm_call_node_ignore_visibility_flag(receiver));
2859 PM_NODE_START_SET_TOKEN(parser, node, operator);
2860 PM_NODE_LENGTH_SET_NODE(node, receiver);
2862 node->receiver = receiver;
2863 node->message_loc = TOK2LOC(parser, operator);
2865 node->name = pm_parser_constant_id_constant(parser, name, strlen(name));
2873static pm_call_node_t *
2874pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
2875 pm_call_node_t *node = pm_call_node_create(parser, PM_CALL_NODE_FLAGS_IGNORE_VISIBILITY);
2877 node->base.location = TOK2LOC(parser, message);
2878 node->message_loc = TOK2LOC(parser, message);
2880 node->name = pm_parser_constant_id_token(parser, message);
2889pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
2891 (node->message_loc.length > 0) &&
2892 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '!') &&
2893 (parser->start[node->message_loc.start + node->message_loc.length - 1] != '?') &&
2894 char_is_identifier_start(parser, parser->start + node->message_loc.start, (ptrdiff_t) node->message_loc.length) &&
2895 (node->opening_loc.length == 0) &&
2896 (node->arguments == NULL) &&
2897 (node->block == NULL)
2905pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
2906 pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
2908 if (write_constant->length > 0) {
2909 size_t length = write_constant->length - 1;
2911 void *memory = xmalloc(length);
2912 memcpy(memory, write_constant->start, length);
2914 *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
2916 // We can get here if the message was missing because of a syntax error.
2917 *read_name = pm_parser_constant_id_constant(parser, "", 0);
2924static pm_call_and_write_node_t *
2925pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2926 assert(target->block == NULL);
2927 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2929 pm_call_and_write_node_t *node = pm_call_and_write_node_new(
2933 PM_LOCATION_INIT_NODES(target, value),
2935 target->call_operator_loc,
2936 target->message_loc,
2939 TOK2LOC(parser, operator),
2943 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
2945 // The target is no longer necessary because we've reused its children.
2946 // It is arena-allocated so no explicit free is needed.
2956pm_index_arguments_check(pm_parser_t *parser, const pm_arguments_node_t *arguments, const pm_node_t *block) {
2957 if (parser->version >= PM_OPTIONS_VERSION_CRUBY_3_4) {
2958 if (arguments != NULL && PM_NODE_FLAG_P(arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS)) {
2960 PM_NODE_LIST_FOREACH(&arguments->arguments, index, node) {
2961 if (PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE)) {
2962 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_INDEX_KEYWORDS);
2968 if (block != NULL) {
2969 pm_parser_err_node(parser, block, PM_ERR_UNEXPECTED_INDEX_BLOCK);
2977static pm_index_and_write_node_t *
2978pm_index_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
2979 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
2981 pm_index_arguments_check(parser, target->arguments, target->block);
2983 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
2985 pm_index_and_write_node_t *node = pm_index_and_write_node_new(
2989 PM_LOCATION_INIT_NODES(target, value),
2991 target->call_operator_loc,
2992 target->opening_loc,
2994 target->closing_loc,
2995 (pm_block_argument_node_t *) target->block,
2996 TOK2LOC(parser, operator),
3000 // The target is no longer necessary because we've reused its children.
3001 // It is arena-allocated so no explicit free is needed.
3009static pm_call_operator_write_node_t *
3010pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3011 assert(target->block == NULL);
3013 pm_call_operator_write_node_t *node = pm_call_operator_write_node_new(
3017 PM_LOCATION_INIT_NODES(target, value),
3019 target->call_operator_loc,
3020 target->message_loc,
3023 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3024 TOK2LOC(parser, operator),
3028 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3030 // The target is no longer necessary because we've reused its children.
3031 // It is arena-allocated so no explicit free is needed.
3039static pm_index_operator_write_node_t *
3040pm_index_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3041 pm_index_arguments_check(parser, target->arguments, target->block);
3043 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3045 pm_index_operator_write_node_t *node = pm_index_operator_write_node_new(
3049 PM_LOCATION_INIT_NODES(target, value),
3051 target->call_operator_loc,
3052 target->opening_loc,
3054 target->closing_loc,
3055 (pm_block_argument_node_t *) target->block,
3056 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
3057 TOK2LOC(parser, operator),
3061 // The target is no longer necessary because we've reused its children.
3062 // It is arena-allocated so no explicit free is needed.
3070static pm_call_or_write_node_t *
3071pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3072 assert(target->block == NULL);
3073 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3075 pm_call_or_write_node_t *node = pm_call_or_write_node_new(
3079 PM_LOCATION_INIT_NODES(target, value),
3081 target->call_operator_loc,
3082 target->message_loc,
3085 TOK2LOC(parser, operator),
3089 pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
3091 // The target is no longer necessary because we've reused its children.
3092 // It is arena-allocated so no explicit free is needed.
3100static pm_index_or_write_node_t *
3101pm_index_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3102 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3104 pm_index_arguments_check(parser, target->arguments, target->block);
3106 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3108 pm_index_or_write_node_t *node = pm_index_or_write_node_new(
3112 PM_LOCATION_INIT_NODES(target, value),
3114 target->call_operator_loc,
3115 target->opening_loc,
3117 target->closing_loc,
3118 (pm_block_argument_node_t *) target->block,
3119 TOK2LOC(parser, operator),
3123 // The target is no longer necessary because we've reused its children.
3124 // It is arena-allocated so no explicit free is needed.
3133static pm_call_target_node_t *
3134pm_call_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3135 pm_call_target_node_t *node = pm_call_target_node_new(
3139 PM_LOCATION_INIT_NODE(target),
3141 target->call_operator_loc,
3146 /* It is possible to get here where we have parsed an invalid syntax tree
3147 * where the call operator was not present. In that case we will have a
3148 * problem because it is a required location. In this case we need to fill
3149 * it in with a fake location so that the syntax tree remains valid. */
3150 if (node->call_operator_loc.length == 0) {
3151 node->call_operator_loc = target->base.location;
3154 // The target is no longer necessary because we've reused its children.
3155 // It is arena-allocated so no explicit free is needed.
3164static pm_index_target_node_t *
3165pm_index_target_node_create(pm_parser_t *parser, pm_call_node_t *target) {
3166 pm_index_arguments_check(parser, target->arguments, target->block);
3167 assert(!target->block || PM_NODE_TYPE_P(target->block, PM_BLOCK_ARGUMENT_NODE));
3169 pm_index_target_node_t *node = pm_index_target_node_new(
3172 FL(target) | PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE,
3173 PM_LOCATION_INIT_NODE(target),
3175 target->opening_loc,
3177 target->closing_loc,
3178 (pm_block_argument_node_t *) target->block
3181 // The target is no longer necessary because we've reused its children.
3182 // It is arena-allocated so no explicit free is needed.
3190static pm_capture_pattern_node_t *
3191pm_capture_pattern_node_create(pm_parser_t *parser, pm_node_t *value, pm_local_variable_target_node_t *target, const pm_token_t *operator) {
3192 return pm_capture_pattern_node_new(
3196 PM_LOCATION_INIT_NODES(value, target),
3199 TOK2LOC(parser, operator)
3206static pm_case_node_t *
3207pm_case_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate, const pm_token_t *end_keyword) {
3208 return pm_case_node_new(
3212 PM_LOCATION_INIT_TOKENS(parser, case_keyword, end_keyword == NULL ? case_keyword : end_keyword),
3214 ((pm_node_list_t) { 0 }),
3216 TOK2LOC(parser, case_keyword),
3217 NTOK2LOC(parser, end_keyword)
3225pm_case_node_condition_append(pm_arena_t *arena, pm_case_node_t *node, pm_node_t *condition) {
3226 assert(PM_NODE_TYPE_P(condition, PM_WHEN_NODE));
3228 pm_node_list_append(arena, &node->conditions, condition);
3229 PM_NODE_LENGTH_SET_NODE(node, condition);
3236pm_case_node_else_clause_set(pm_case_node_t *node, pm_else_node_t *else_clause) {
3237 node->else_clause = else_clause;
3238 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3245pm_case_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_node_t *node, const pm_token_t *end_keyword) {
3246 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3247 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3253static pm_case_match_node_t *
3254pm_case_match_node_create(pm_parser_t *parser, const pm_token_t *case_keyword, pm_node_t *predicate) {
3255 return pm_case_match_node_new(
3259 PM_LOCATION_INIT_TOKEN(parser, case_keyword),
3261 ((pm_node_list_t) { 0 }),
3263 TOK2LOC(parser, case_keyword),
3264 ((pm_location_t) { 0 })
3272pm_case_match_node_condition_append(pm_arena_t *arena, pm_case_match_node_t *node, pm_node_t *condition) {
3273 assert(PM_NODE_TYPE_P(condition, PM_IN_NODE));
3275 pm_node_list_append(arena, &node->conditions, condition);
3276 PM_NODE_LENGTH_SET_NODE(node, condition);
3283pm_case_match_node_else_clause_set(pm_case_match_node_t *node, pm_else_node_t *else_clause) {
3284 node->else_clause = else_clause;
3285 PM_NODE_LENGTH_SET_NODE(node, else_clause);
3292pm_case_match_node_end_keyword_loc_set(const pm_parser_t *parser, pm_case_match_node_t *node, const pm_token_t *end_keyword) {
3293 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
3294 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
3300static pm_class_node_t *
3301pm_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, pm_node_t *constant_path, const pm_token_t *name, const pm_token_t *inheritance_operator, pm_node_t *superclass, pm_node_t *body, const pm_token_t *end_keyword) {
3302 return pm_class_node_new(
3306 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
3308 TOK2LOC(parser, class_keyword),
3310 NTOK2LOC(parser, inheritance_operator),
3313 TOK2LOC(parser, end_keyword),
3314 pm_parser_constant_id_token(parser, name)
3321static pm_class_variable_and_write_node_t *
3322pm_class_variable_and_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3323 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3325 return pm_class_variable_and_write_node_new(
3329 PM_LOCATION_INIT_NODES(target, value),
3331 target->base.location,
3332 TOK2LOC(parser, operator),
3340static pm_class_variable_operator_write_node_t *
3341pm_class_variable_operator_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3342 return pm_class_variable_operator_write_node_new(
3346 PM_LOCATION_INIT_NODES(target, value),
3348 target->base.location,
3349 TOK2LOC(parser, operator),
3351 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3358static pm_class_variable_or_write_node_t *
3359pm_class_variable_or_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3360 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3362 return pm_class_variable_or_write_node_new(
3366 PM_LOCATION_INIT_NODES(target, value),
3368 target->base.location,
3369 TOK2LOC(parser, operator),
3377static pm_class_variable_read_node_t *
3378pm_class_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
3379 assert(token->type == PM_TOKEN_CLASS_VARIABLE);
3381 return pm_class_variable_read_node_new(
3385 PM_LOCATION_INIT_TOKEN(parser, token),
3386 pm_parser_constant_id_token(parser, token)
3396static inline pm_node_flags_t
3397pm_implicit_array_write_flags(const pm_node_t *node, pm_node_flags_t flags) {
3398 if (PM_NODE_TYPE_P(node, PM_ARRAY_NODE) && ((const pm_array_node_t *) node)->opening_loc.length == 0) {
3407static pm_class_variable_write_node_t *
3408pm_class_variable_write_node_create(pm_parser_t *parser, pm_class_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
3409 return pm_class_variable_write_node_new(
3412 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3413 PM_LOCATION_INIT_NODES(read_node, value),
3415 read_node->base.location,
3417 TOK2LOC(parser, operator)
3424static pm_constant_path_and_write_node_t *
3425pm_constant_path_and_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3426 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3428 return pm_constant_path_and_write_node_new(
3432 PM_LOCATION_INIT_NODES(target, value),
3434 TOK2LOC(parser, operator),
3442static pm_constant_path_operator_write_node_t *
3443pm_constant_path_operator_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3444 return pm_constant_path_operator_write_node_new(
3448 PM_LOCATION_INIT_NODES(target, value),
3450 TOK2LOC(parser, operator),
3452 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3459static pm_constant_path_or_write_node_t *
3460pm_constant_path_or_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3461 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3463 return pm_constant_path_or_write_node_new(
3467 PM_LOCATION_INIT_NODES(target, value),
3469 TOK2LOC(parser, operator),
3477static pm_constant_path_node_t *
3478pm_constant_path_node_create(pm_parser_t *parser, pm_node_t *parent, const pm_token_t *delimiter, const pm_token_t *name_token) {
3479 pm_assert_value_expression(parser, parent);
3481 pm_constant_id_t name = PM_CONSTANT_ID_UNSET;
3482 if (name_token->type == PM_TOKEN_CONSTANT) {
3483 name = pm_parser_constant_id_token(parser, name_token);
3486 return pm_constant_path_node_new(
3490 (parent == NULL) ? PM_LOCATION_INIT_TOKENS(parser, delimiter, name_token) : PM_LOCATION_INIT_NODE_TOKEN(parser, parent, name_token),
3493 TOK2LOC(parser, delimiter),
3494 TOK2LOC(parser, name_token)
3501static pm_constant_path_write_node_t *
3502pm_constant_path_write_node_create(pm_parser_t *parser, pm_constant_path_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3503 return pm_constant_path_write_node_new(
3506 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3507 PM_LOCATION_INIT_NODES(target, value),
3509 TOK2LOC(parser, operator),
3517static pm_constant_and_write_node_t *
3518pm_constant_and_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3519 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
3521 return pm_constant_and_write_node_new(
3525 PM_LOCATION_INIT_NODES(target, value),
3527 target->base.location,
3528 TOK2LOC(parser, operator),
3536static pm_constant_operator_write_node_t *
3537pm_constant_operator_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3538 return pm_constant_operator_write_node_new(
3542 PM_LOCATION_INIT_NODES(target, value),
3544 target->base.location,
3545 TOK2LOC(parser, operator),
3547 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
3554static pm_constant_or_write_node_t *
3555pm_constant_or_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3556 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
3558 return pm_constant_or_write_node_new(
3562 PM_LOCATION_INIT_NODES(target, value),
3564 target->base.location,
3565 TOK2LOC(parser, operator),
3573static pm_constant_read_node_t *
3574pm_constant_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
3575 assert(name->type == PM_TOKEN_CONSTANT || name->type == 0);
3577 return pm_constant_read_node_new(
3581 PM_LOCATION_INIT_TOKEN(parser, name),
3582 pm_parser_constant_id_token(parser, name)
3589static pm_constant_write_node_t *
3590pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
3591 return pm_constant_write_node_new(
3594 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
3595 PM_LOCATION_INIT_NODES(target, value),
3597 target->base.location,
3599 TOK2LOC(parser, operator)
3607pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
3608 switch (PM_NODE_TYPE(node)) {
3609 case PM_BEGIN_NODE: {
3610 const pm_begin_node_t *cast = (pm_begin_node_t *) node;
3611 if (cast->statements != NULL) pm_def_node_receiver_check(parser, UP(cast->statements));
3614 case PM_PARENTHESES_NODE: {
3615 const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
3616 if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
3619 case PM_STATEMENTS_NODE: {
3620 const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
3621 pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
3626 case PM_IMAGINARY_NODE:
3627 case PM_INTEGER_NODE:
3628 case PM_INTERPOLATED_REGULAR_EXPRESSION_NODE:
3629 case PM_INTERPOLATED_STRING_NODE:
3630 case PM_INTERPOLATED_SYMBOL_NODE:
3631 case PM_INTERPOLATED_X_STRING_NODE:
3632 case PM_RATIONAL_NODE:
3633 case PM_REGULAR_EXPRESSION_NODE:
3634 case PM_SOURCE_ENCODING_NODE:
3635 case PM_SOURCE_FILE_NODE:
3636 case PM_SOURCE_LINE_NODE:
3637 case PM_STRING_NODE:
3638 case PM_SYMBOL_NODE:
3639 case PM_X_STRING_NODE:
3640 pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
3650static pm_def_node_t *
3652 pm_parser_t *parser,
3653 pm_constant_id_t name,
3654 const pm_token_t *name_loc,
3655 pm_node_t *receiver,
3656 pm_parameters_node_t *parameters,
3658 pm_constant_id_list_t *locals,
3659 const pm_token_t *def_keyword,
3660 const pm_token_t *operator,
3661 const pm_token_t *lparen,
3662 const pm_token_t *rparen,
3663 const pm_token_t *equal,
3664 const pm_token_t *end_keyword
3666 if (receiver != NULL) {
3667 pm_def_node_receiver_check(parser, receiver);
3670 return pm_def_node_new(
3674 (end_keyword == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, def_keyword, body) : PM_LOCATION_INIT_TOKENS(parser, def_keyword, end_keyword),
3676 TOK2LOC(parser, name_loc),
3681 TOK2LOC(parser, def_keyword),
3682 NTOK2LOC(parser, operator),
3683 NTOK2LOC(parser, lparen),
3684 NTOK2LOC(parser, rparen),
3685 NTOK2LOC(parser, equal),
3686 NTOK2LOC(parser, end_keyword)
3693static pm_defined_node_t *
3694pm_defined_node_create(pm_parser_t *parser, const pm_token_t *lparen, pm_node_t *value, const pm_token_t *rparen, const pm_token_t *keyword) {
3695 return pm_defined_node_new(
3699 (rparen == NULL) ? PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, value) : PM_LOCATION_INIT_TOKENS(parser, keyword, rparen),
3700 NTOK2LOC(parser, lparen),
3702 NTOK2LOC(parser, rparen),
3703 TOK2LOC(parser, keyword)
3710static pm_else_node_t *
3711pm_else_node_create(pm_parser_t *parser, const pm_token_t *else_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3712 return pm_else_node_new(
3716 ((end_keyword == NULL) && (statements != NULL)) ? PM_LOCATION_INIT_TOKEN_NODE(parser, else_keyword, statements) : PM_LOCATION_INIT_TOKENS(parser, else_keyword, end_keyword),
3717 TOK2LOC(parser, else_keyword),
3719 NTOK2LOC(parser, end_keyword)
3726static pm_embedded_statements_node_t *
3727pm_embedded_statements_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
3728 return pm_embedded_statements_node_new(
3732 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
3733 TOK2LOC(parser, opening),
3735 TOK2LOC(parser, closing)
3742static pm_embedded_variable_node_t *
3743pm_embedded_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
3744 return pm_embedded_variable_node_new(
3748 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
3749 TOK2LOC(parser, operator),
3757static pm_ensure_node_t *
3758pm_ensure_node_create(pm_parser_t *parser, const pm_token_t *ensure_keyword, pm_statements_node_t *statements, const pm_token_t *end_keyword) {
3759 return pm_ensure_node_new(
3763 PM_LOCATION_INIT_TOKENS(parser, ensure_keyword, end_keyword),
3764 TOK2LOC(parser, ensure_keyword),
3766 TOK2LOC(parser, end_keyword)
3773static pm_false_node_t *
3774pm_false_node_create(pm_parser_t *parser, const pm_token_t *token) {
3775 assert(token->type == PM_TOKEN_KEYWORD_FALSE);
3777 return pm_false_node_new(
3780 PM_NODE_FLAG_STATIC_LITERAL,
3781 PM_LOCATION_INIT_TOKEN(parser, token)
3789static pm_find_pattern_node_t *
3790pm_find_pattern_node_create(pm_parser_t *parser, pm_node_list_t *nodes) {
3791 pm_node_t *left = nodes->nodes[0];
3792 assert(PM_NODE_TYPE_P(left, PM_SPLAT_NODE));
3793 pm_splat_node_t *left_splat_node = (pm_splat_node_t *) left;
3797 if (nodes->size == 1) {
3798 right = UP(pm_missing_node_create(parser, PM_NODE_END(left), 0));
3800 right = nodes->nodes[nodes->size - 1];
3801 assert(PM_NODE_TYPE_P(right, PM_SPLAT_NODE));
3804#if PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS
3805 // FindPatternNode#right is typed as SplatNode in this case, so replace the potential MissingNode with a SplatNode.
3806 // The resulting AST will anyway be ignored, but this file still needs to compile.
3807 pm_splat_node_t *right_splat_node = PM_NODE_TYPE_P(right, PM_SPLAT_NODE) ? (pm_splat_node_t *) right : left_splat_node;
3809 pm_node_t *right_splat_node = right;
3812 pm_find_pattern_node_t *node = pm_find_pattern_node_new(
3816 PM_LOCATION_INIT_NODES(left, right),
3819 ((pm_node_list_t) { 0 }),
3821 ((pm_location_t) { 0 }),
3822 ((pm_location_t) { 0 })
3825 // For now we're going to just copy over each pointer manually. This could be
3826 // much more efficient, as we could instead resize the node list to only point
3828 for (size_t index = 1; index < nodes->size - 1; index++) {
3829 pm_node_list_append(parser->arena, &node->requireds, nodes->nodes[index]);
3840pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
3841 ptrdiff_t diff = token->end - token->start;
3842 if (diff <= 0) return 0.0;
3844 // First, get a buffer of the content.
3845 size_t length = (size_t) diff;
3846 const size_t buffer_size = sizeof(char) * (length + 1);
3847 char *buffer = xmalloc(buffer_size);
3848 memcpy((void *) buffer, token->start, length);
3850 // Next, determine if we need to replace the decimal point because of
3851 // locale-specific options, and then normalize them if we have to.
3852 char decimal_point = *localeconv()->decimal_point;
3853 if (decimal_point != '.') {
3854 for (size_t index = 0; index < length; index++) {
3855 if (buffer[index] == '.') buffer[index] = decimal_point;
3859 // Next, handle underscores by removing them from the buffer.
3860 for (size_t index = 0; index < length; index++) {
3861 if (buffer[index] == '_') {
3862 memmove((void *) (buffer + index), (void *) (buffer + index + 1), length - index);
3867 // Null-terminate the buffer so that strtod cannot read off the end.
3868 buffer[length] = '\0';
3870 // Now, call strtod to parse the value. Note that CRuby has their own
3871 // version of strtod which avoids locales. We're okay using the locale-aware
3872 // version because we've already validated through the parser that the token
3873 // is in a valid format.
3876 double value = strtod(buffer, &eptr);
3878 // This should never happen, because we've already checked that the token
3879 // is in a valid format. However it's good to be safe.
3880 if ((eptr != buffer + length) || (errno != 0 && errno != ERANGE)) {
3881 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, PM_ERR_FLOAT_PARSE);
3882 xfree_sized(buffer, buffer_size);
3886 // If errno is set, then it should only be ERANGE. At this point we need to
3887 // check if it's infinity (it should be).
3888 if (errno == ERANGE && PRISM_ISINF(value)) {
3890 const char *ellipsis;
3896 warn_width = (int) length;
3900 pm_diagnostic_list_append_format(&parser->warning_list, PM_TOKEN_START(parser, token), PM_TOKEN_LENGTH(token), PM_WARN_FLOAT_OUT_OF_RANGE, warn_width, (const char *) token->start, ellipsis);
3901 value = (value < 0.0) ? -HUGE_VAL : HUGE_VAL;
3904 // Finally we can free the buffer and return the value.
3905 xfree_sized(buffer, buffer_size);
3912static pm_float_node_t *
3913pm_float_node_create(pm_parser_t *parser, const pm_token_t *token) {
3914 assert(token->type == PM_TOKEN_FLOAT);
3916 return pm_float_node_new(
3919 PM_NODE_FLAG_STATIC_LITERAL,
3920 PM_LOCATION_INIT_TOKEN(parser, token),
3921 pm_double_parse(parser, token)
3928static pm_imaginary_node_t *
3929pm_float_node_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
3930 assert(token->type == PM_TOKEN_FLOAT_IMAGINARY);
3932 return pm_imaginary_node_new(
3935 PM_NODE_FLAG_STATIC_LITERAL,
3936 PM_LOCATION_INIT_TOKEN(parser, token),
3937 UP(pm_float_node_create(parser, &((pm_token_t) {
3938 .type = PM_TOKEN_FLOAT,
3939 .start = token->start,
3940 .end = token->end - 1
3948static pm_rational_node_t *
3949pm_float_node_rational_create(pm_parser_t *parser, const pm_token_t *token) {
3950 assert(token->type == PM_TOKEN_FLOAT_RATIONAL);
3952 pm_rational_node_t *node = pm_rational_node_new(
3955 PM_INTEGER_BASE_FLAGS_DECIMAL | PM_NODE_FLAG_STATIC_LITERAL,
3956 PM_LOCATION_INIT_TOKEN(parser, token),
3957 ((pm_integer_t) { 0 }),
3958 ((pm_integer_t) { 0 })
3961 const uint8_t *start = token->start;
3962 const uint8_t *end = token->end - 1; // r
3964 while (start < end && *start == '0') start++; // 0.1 -> .1
3965 while (end > start && end[-1] == '0') end--; // 1.0 -> 1.
3967 size_t length = (size_t) (end - start);
3969 node->denominator.value = 1;
3973 const uint8_t *point = memchr(start, '.', length);
3974 assert(point && "should have a decimal point");
3976 uint8_t *digits = xmalloc(length);
3977 if (digits == NULL) {
3978 fputs("[pm_float_node_rational_create] Failed to allocate memory", stderr);
3982 memcpy(digits, start, (unsigned long) (point - start));
3983 memcpy(digits + (point - start), point + 1, (unsigned long) (end - point - 1));
3984 pm_integer_parse(&node->numerator, PM_INTEGER_BASE_DEFAULT, digits, digits + length - 1);
3986 size_t fract_length = 0;
3987 for (const uint8_t *fract = point; fract < end; ++fract) {
3988 if (*fract != '_') ++fract_length;
3991 if (fract_length > 1) memset(digits + 1, '0', fract_length - 1);
3992 pm_integer_parse(&node->denominator, PM_INTEGER_BASE_DEFAULT, digits, digits + fract_length);
3993 xfree_sized(digits, length);
3995 pm_integers_reduce(&node->numerator, &node->denominator);
3996 pm_integer_arena_move(parser->arena, &node->numerator);
3997 pm_integer_arena_move(parser->arena, &node->denominator);
4005static pm_imaginary_node_t *
4006pm_float_node_rational_imaginary_create(pm_parser_t *parser, const pm_token_t *token) {
4007 assert(token->type == PM_TOKEN_FLOAT_RATIONAL_IMAGINARY);
4009 return pm_imaginary_node_new(
4012 PM_NODE_FLAG_STATIC_LITERAL,
4013 PM_LOCATION_INIT_TOKEN(parser, token),
4014 UP(pm_float_node_rational_create(parser, &((pm_token_t) {
4015 .type = PM_TOKEN_FLOAT_RATIONAL,
4016 .start = token->start,
4017 .end = token->end - 1
4025static pm_for_node_t *
4027 pm_parser_t *parser,
4029 pm_node_t *collection,
4030 pm_statements_node_t *statements,
4031 const pm_token_t *for_keyword,
4032 const pm_token_t *in_keyword,
4033 const pm_token_t *do_keyword,
4034 const pm_token_t *end_keyword
4036 return pm_for_node_new(
4040 PM_LOCATION_INIT_TOKENS(parser, for_keyword, end_keyword),
4044 TOK2LOC(parser, for_keyword),
4045 TOK2LOC(parser, in_keyword),
4046 NTOK2LOC(parser, do_keyword),
4047 TOK2LOC(parser, end_keyword)
4054static pm_forwarding_arguments_node_t *
4055pm_forwarding_arguments_node_create(pm_parser_t *parser, const pm_token_t *token) {
4056 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4058 return pm_forwarding_arguments_node_new(
4062 PM_LOCATION_INIT_TOKEN(parser, token)
4069static pm_forwarding_parameter_node_t *
4070pm_forwarding_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
4071 assert(token->type == PM_TOKEN_UDOT_DOT_DOT);
4073 return pm_forwarding_parameter_node_new(
4077 PM_LOCATION_INIT_TOKEN(parser, token)
4084static pm_forwarding_super_node_t *
4085pm_forwarding_super_node_create(pm_parser_t *parser, const pm_token_t *token, pm_arguments_t *arguments) {
4086 assert(arguments->block == NULL || PM_NODE_TYPE_P(arguments->block, PM_BLOCK_NODE));
4087 assert(token->type == PM_TOKEN_KEYWORD_SUPER);
4089 pm_block_node_t *block = NULL;
4090 if (arguments->block != NULL) {
4091 block = (pm_block_node_t *) arguments->block;
4094 return pm_forwarding_super_node_new(
4098 (block == NULL) ? PM_LOCATION_INIT_TOKEN(parser, token) : PM_LOCATION_INIT_TOKEN_NODE(parser, token, block),
4107static pm_hash_pattern_node_t *
4108pm_hash_pattern_node_empty_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4109 return pm_hash_pattern_node_new(
4113 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4115 ((pm_node_list_t) { 0 }),
4117 TOK2LOC(parser, opening),
4118 TOK2LOC(parser, closing)
4125static pm_hash_pattern_node_t *
4126pm_hash_pattern_node_node_list_create(pm_parser_t *parser, pm_node_list_t *elements, pm_node_t *rest) {
4130 if (elements->size > 0) {
4132 start = MIN(PM_NODE_START(rest), PM_NODE_START(elements->nodes[0]));
4133 end = MAX(PM_NODE_END(rest), PM_NODE_END(elements->nodes[elements->size - 1]));
4135 start = PM_NODE_START(elements->nodes[0]);
4136 end = PM_NODE_END(elements->nodes[elements->size - 1]);
4139 assert(rest != NULL);
4140 start = PM_NODE_START(rest);
4141 end = PM_NODE_END(rest);
4144 pm_hash_pattern_node_t *node = pm_hash_pattern_node_new(
4148 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4150 ((pm_node_list_t) { 0 }),
4152 ((pm_location_t) { 0 }),
4153 ((pm_location_t) { 0 })
4156 pm_node_list_concat(parser->arena, &node->elements, elements);
4163static pm_constant_id_t
4164pm_global_variable_write_name(pm_parser_t *parser, const pm_node_t *target) {
4165 switch (PM_NODE_TYPE(target)) {
4166 case PM_GLOBAL_VARIABLE_READ_NODE:
4167 return ((pm_global_variable_read_node_t *) target)->name;
4168 case PM_BACK_REFERENCE_READ_NODE:
4169 return ((pm_back_reference_read_node_t *) target)->name;
4170 case PM_NUMBERED_REFERENCE_READ_NODE:
4171 // This will only ever happen in the event of a syntax error, but we
4172 // still need to provide something for the node.
4173 return pm_parser_constant_id_raw(parser, parser->start + PM_NODE_START(target), parser->start + PM_NODE_END(target));
4175 assert(false && "unreachable");
4176 return (pm_constant_id_t) -1;
4183static pm_global_variable_and_write_node_t *
4184pm_global_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4185 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4187 return pm_global_variable_and_write_node_new(
4191 PM_LOCATION_INIT_NODES(target, value),
4192 pm_global_variable_write_name(parser, target),
4194 TOK2LOC(parser, operator),
4202static pm_global_variable_operator_write_node_t *
4203pm_global_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4204 return pm_global_variable_operator_write_node_new(
4208 PM_LOCATION_INIT_NODES(target, value),
4209 pm_global_variable_write_name(parser, target),
4211 TOK2LOC(parser, operator),
4213 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4220static pm_global_variable_or_write_node_t *
4221pm_global_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4222 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4224 return pm_global_variable_or_write_node_new(
4228 PM_LOCATION_INIT_NODES(target, value),
4229 pm_global_variable_write_name(parser, target),
4231 TOK2LOC(parser, operator),
4239static pm_global_variable_read_node_t *
4240pm_global_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
4241 return pm_global_variable_read_node_new(
4245 PM_LOCATION_INIT_TOKEN(parser, name),
4246 pm_parser_constant_id_token(parser, name)
4253static pm_global_variable_read_node_t *
4254pm_global_variable_read_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name) {
4255 return pm_global_variable_read_node_new(
4259 PM_LOCATION_INIT_UNSET,
4267static pm_global_variable_write_node_t *
4268pm_global_variable_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4269 return pm_global_variable_write_node_new(
4272 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4273 PM_LOCATION_INIT_NODES(target, value),
4274 pm_global_variable_write_name(parser, target),
4277 TOK2LOC(parser, operator)
4284static pm_global_variable_write_node_t *
4285pm_global_variable_write_node_synthesized_create(pm_parser_t *parser, pm_constant_id_t name, pm_node_t *value) {
4286 return pm_global_variable_write_node_new(
4290 PM_LOCATION_INIT_UNSET,
4292 ((pm_location_t) { 0 }),
4294 ((pm_location_t) { 0 })
4301static pm_hash_node_t *
4302pm_hash_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4303 assert(opening != NULL);
4305 return pm_hash_node_new(
4308 PM_NODE_FLAG_STATIC_LITERAL,
4309 PM_LOCATION_INIT_TOKEN(parser, opening),
4310 TOK2LOC(parser, opening),
4311 ((pm_node_list_t) { 0 }),
4312 ((pm_location_t) { 0 })
4320pm_hash_node_elements_append(pm_arena_t *arena, pm_hash_node_t *hash, pm_node_t *element) {
4321 pm_node_list_append(arena, &hash->elements, element);
4323 bool static_literal = PM_NODE_TYPE_P(element, PM_ASSOC_NODE);
4324 if (static_literal) {
4325 pm_assoc_node_t *assoc = (pm_assoc_node_t *) element;
4326 static_literal = !PM_NODE_TYPE_P(assoc->key, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_HASH_NODE) && !PM_NODE_TYPE_P(assoc->key, PM_RANGE_NODE);
4327 static_literal = static_literal && PM_NODE_FLAG_P(assoc->key, PM_NODE_FLAG_STATIC_LITERAL);
4328 static_literal = static_literal && PM_NODE_FLAG_P(assoc, PM_NODE_FLAG_STATIC_LITERAL);
4331 if (!static_literal) {
4332 pm_node_flag_unset(UP(hash), PM_NODE_FLAG_STATIC_LITERAL);
4337pm_hash_node_closing_loc_set(const pm_parser_t *parser, pm_hash_node_t *hash, pm_token_t *token) {
4338 PM_NODE_LENGTH_SET_TOKEN(parser, hash, token);
4339 hash->closing_loc = TOK2LOC(parser, token);
4345static pm_if_node_t *
4346pm_if_node_create(pm_parser_t *parser,
4347 const pm_token_t *if_keyword,
4348 pm_node_t *predicate,
4349 const pm_token_t *then_keyword,
4350 pm_statements_node_t *statements,
4351 pm_node_t *subsequent,
4352 const pm_token_t *end_keyword
4354 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4356 uint32_t start = PM_TOKEN_START(parser, if_keyword);
4359 if (end_keyword != NULL) {
4360 end = PM_TOKEN_END(parser, end_keyword);
4361 } else if (subsequent != NULL) {
4362 end = PM_NODE_END(subsequent);
4363 } else if (pm_statements_node_body_length(statements) != 0) {
4364 end = PM_NODE_END(statements);
4366 end = PM_NODE_END(predicate);
4369 return pm_if_node_new(
4372 PM_NODE_FLAG_NEWLINE,
4373 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4374 TOK2LOC(parser, if_keyword),
4376 NTOK2LOC(parser, then_keyword),
4379 NTOK2LOC(parser, end_keyword)
4386static pm_if_node_t *
4387pm_if_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *if_keyword, pm_node_t *predicate) {
4388 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4390 pm_statements_node_t *statements = pm_statements_node_create(parser);
4391 pm_statements_node_body_append(parser, statements, statement, true);
4393 return pm_if_node_new(
4396 PM_NODE_FLAG_NEWLINE,
4397 PM_LOCATION_INIT_NODES(statement, predicate),
4398 TOK2LOC(parser, if_keyword),
4400 ((pm_location_t) { 0 }),
4403 ((pm_location_t) { 0 })
4410static pm_if_node_t *
4411pm_if_node_ternary_create(pm_parser_t *parser, pm_node_t *predicate, const pm_token_t *qmark, pm_node_t *true_expression, const pm_token_t *colon, pm_node_t *false_expression) {
4412 pm_assert_value_expression(parser, predicate);
4413 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
4415 pm_statements_node_t *if_statements = pm_statements_node_create(parser);
4416 pm_statements_node_body_append(parser, if_statements, true_expression, true);
4418 pm_statements_node_t *else_statements = pm_statements_node_create(parser);
4419 pm_statements_node_body_append(parser, else_statements, false_expression, true);
4421 pm_else_node_t *else_node = pm_else_node_create(parser, colon, else_statements, NULL);
4422 return pm_if_node_new(
4425 PM_NODE_FLAG_NEWLINE,
4426 PM_LOCATION_INIT_NODES(predicate, false_expression),
4427 ((pm_location_t) { 0 }),
4429 TOK2LOC(parser, qmark),
4432 ((pm_location_t) { 0 })
4437pm_if_node_end_keyword_loc_set(const pm_parser_t *parser, pm_if_node_t *node, const pm_token_t *keyword) {
4438 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4439 node->end_keyword_loc = TOK2LOC(parser, keyword);
4443pm_else_node_end_keyword_loc_set(const pm_parser_t *parser, pm_else_node_t *node, const pm_token_t *keyword) {
4444 PM_NODE_LENGTH_SET_TOKEN(parser, node, keyword);
4445 node->end_keyword_loc = TOK2LOC(parser, keyword);
4451static pm_implicit_node_t *
4452pm_implicit_node_create(pm_parser_t *parser, pm_node_t *value) {
4453 return pm_implicit_node_new(
4457 PM_LOCATION_INIT_NODE(value),
4465static pm_implicit_rest_node_t *
4466pm_implicit_rest_node_create(pm_parser_t *parser, const pm_token_t *token) {
4467 assert(token->type == PM_TOKEN_COMMA);
4469 return pm_implicit_rest_node_new(
4473 PM_LOCATION_INIT_TOKEN(parser, token)
4480static pm_integer_node_t *
4481pm_integer_node_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4482 assert(token->type == PM_TOKEN_INTEGER);
4484 pm_integer_node_t *node = pm_integer_node_new(
4487 base | PM_NODE_FLAG_STATIC_LITERAL,
4488 PM_LOCATION_INIT_TOKEN(parser, token),
4489 ((pm_integer_t) { 0 })
4492 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4494 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4495 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4496 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4497 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4498 default: assert(false && "unreachable"); break;
4501 pm_integer_parse(&node->value, integer_base, token->start, token->end);
4502 pm_integer_arena_move(parser->arena, &node->value);
4510static pm_imaginary_node_t *
4511pm_integer_node_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4512 assert(token->type == PM_TOKEN_INTEGER_IMAGINARY);
4514 return pm_imaginary_node_new(
4517 PM_NODE_FLAG_STATIC_LITERAL,
4518 PM_LOCATION_INIT_TOKEN(parser, token),
4519 UP(pm_integer_node_create(parser, base, &((pm_token_t) {
4520 .type = PM_TOKEN_INTEGER,
4521 .start = token->start,
4522 .end = token->end - 1
4531static pm_rational_node_t *
4532pm_integer_node_rational_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4533 assert(token->type == PM_TOKEN_INTEGER_RATIONAL);
4535 pm_rational_node_t *node = pm_rational_node_new(
4538 base | PM_NODE_FLAG_STATIC_LITERAL,
4539 PM_LOCATION_INIT_TOKEN(parser, token),
4540 ((pm_integer_t) { 0 }),
4541 ((pm_integer_t) { .value = 1 })
4544 pm_integer_base_t integer_base = PM_INTEGER_BASE_DECIMAL;
4546 case PM_INTEGER_BASE_FLAGS_BINARY: integer_base = PM_INTEGER_BASE_BINARY; break;
4547 case PM_INTEGER_BASE_FLAGS_OCTAL: integer_base = PM_INTEGER_BASE_OCTAL; break;
4548 case PM_INTEGER_BASE_FLAGS_DECIMAL: break;
4549 case PM_INTEGER_BASE_FLAGS_HEXADECIMAL: integer_base = PM_INTEGER_BASE_HEXADECIMAL; break;
4550 default: assert(false && "unreachable"); break;
4553 pm_integer_parse(&node->numerator, integer_base, token->start, token->end - 1);
4554 pm_integer_arena_move(parser->arena, &node->numerator);
4563static pm_imaginary_node_t *
4564pm_integer_node_rational_imaginary_create(pm_parser_t *parser, pm_node_flags_t base, const pm_token_t *token) {
4565 assert(token->type == PM_TOKEN_INTEGER_RATIONAL_IMAGINARY);
4567 return pm_imaginary_node_new(
4570 PM_NODE_FLAG_STATIC_LITERAL,
4571 PM_LOCATION_INIT_TOKEN(parser, token),
4572 UP(pm_integer_node_rational_create(parser, base, &((pm_token_t) {
4573 .type = PM_TOKEN_INTEGER_RATIONAL,
4574 .start = token->start,
4575 .end = token->end - 1
4583static pm_in_node_t *
4584pm_in_node_create(pm_parser_t *parser, pm_node_t *pattern, pm_statements_node_t *statements, const pm_token_t *in_keyword, const pm_token_t *then_keyword) {
4585 uint32_t start = PM_TOKEN_START(parser, in_keyword);
4588 if (statements != NULL) {
4589 end = PM_NODE_END(statements);
4590 } else if (then_keyword != NULL) {
4591 end = PM_TOKEN_END(parser, then_keyword);
4593 end = PM_NODE_END(pattern);
4596 return pm_in_node_new(
4600 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4603 TOK2LOC(parser, in_keyword),
4604 NTOK2LOC(parser, then_keyword)
4611static pm_instance_variable_and_write_node_t *
4612pm_instance_variable_and_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4613 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
4615 return pm_instance_variable_and_write_node_new(
4619 PM_LOCATION_INIT_NODES(target, value),
4621 target->base.location,
4622 TOK2LOC(parser, operator),
4630static pm_instance_variable_operator_write_node_t *
4631pm_instance_variable_operator_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4632 return pm_instance_variable_operator_write_node_new(
4636 PM_LOCATION_INIT_NODES(target, value),
4638 target->base.location,
4639 TOK2LOC(parser, operator),
4641 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1)
4648static pm_instance_variable_or_write_node_t *
4649pm_instance_variable_or_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *target, const pm_token_t *operator, pm_node_t *value) {
4650 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
4652 return pm_instance_variable_or_write_node_new(
4656 PM_LOCATION_INIT_NODES(target, value),
4658 target->base.location,
4659 TOK2LOC(parser, operator),
4667static pm_instance_variable_read_node_t *
4668pm_instance_variable_read_node_create(pm_parser_t *parser, const pm_token_t *token) {
4669 assert(token->type == PM_TOKEN_INSTANCE_VARIABLE);
4671 return pm_instance_variable_read_node_new(
4675 PM_LOCATION_INIT_TOKEN(parser, token),
4676 pm_parser_constant_id_token(parser, token)
4684static pm_instance_variable_write_node_t *
4685pm_instance_variable_write_node_create(pm_parser_t *parser, pm_instance_variable_read_node_t *read_node, pm_token_t *operator, pm_node_t *value) {
4686 return pm_instance_variable_write_node_new(
4689 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
4690 PM_LOCATION_INIT_NODES(read_node, value),
4692 read_node->base.location,
4694 TOK2LOC(parser, operator)
4704pm_interpolated_node_append(pm_arena_t *arena, pm_node_t *node, pm_node_list_t *parts, pm_node_t *part) {
4705 switch (PM_NODE_TYPE(part)) {
4706 case PM_STRING_NODE:
4707 pm_node_flag_set(part, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4709 case PM_EMBEDDED_STATEMENTS_NODE: {
4710 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4711 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4713 if (embedded == NULL) {
4714 // If there are no statements or more than one statement, then
4715 // we lose the static literal flag.
4716 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4717 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4718 // If the embedded statement is a string, then we can keep the
4719 // static literal flag and mark the string as frozen.
4720 pm_node_flag_set(embedded, PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN);
4721 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4722 // If the embedded statement is an interpolated string and it's
4723 // a static literal, then we can keep the static literal flag.
4725 // Otherwise we lose the static literal flag.
4726 pm_node_flag_unset(node, PM_NODE_FLAG_STATIC_LITERAL);
4731 case PM_EMBEDDED_VARIABLE_NODE:
4732 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4735 assert(false && "unexpected node type");
4739 pm_node_list_append(arena, parts, part);
4745static pm_interpolated_regular_expression_node_t *
4746pm_interpolated_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening) {
4747 return pm_interpolated_regular_expression_node_new(
4750 PM_NODE_FLAG_STATIC_LITERAL,
4751 PM_LOCATION_INIT_TOKEN(parser, opening),
4752 TOK2LOC(parser, opening),
4753 ((pm_node_list_t) { 0 }),
4754 TOK2LOC(parser, opening)
4759pm_interpolated_regular_expression_node_append(pm_arena_t *arena, pm_interpolated_regular_expression_node_t *node, pm_node_t *part) {
4760 if (PM_NODE_START(node) > PM_NODE_START(part)) {
4761 PM_NODE_START_SET_NODE(node, part);
4763 if (PM_NODE_END(node) < PM_NODE_END(part)) {
4764 PM_NODE_LENGTH_SET_NODE(node, part);
4767 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
4771pm_interpolated_regular_expression_node_closing_set(pm_parser_t *parser, pm_interpolated_regular_expression_node_t *node, const pm_token_t *closing) {
4772 node->closing_loc = TOK2LOC(parser, closing);
4773 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4774 pm_node_flag_set(UP(node), pm_regular_expression_flags_create(parser, closing));
4801pm_interpolated_string_node_append(pm_arena_t *arena, pm_interpolated_string_node_t *node, pm_node_t *part) {
4802#define CLEAR_FLAGS(node) \
4803 node->base.flags = (pm_node_flags_t) (FL(node) & ~(PM_NODE_FLAG_STATIC_LITERAL | PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE))
4805#define MUTABLE_FLAGS(node) \
4806 node->base.flags = (pm_node_flags_t) ((FL(node) | PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE) & ~PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN);
4808 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4809 PM_NODE_START_SET_NODE(node, part);
4812 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4813 PM_NODE_LENGTH_SET_NODE(node, part);
4816 switch (PM_NODE_TYPE(part)) {
4817 case PM_STRING_NODE:
4818 // If inner string is not frozen, it stops being a static literal. We should *not* clear other flags,
4819 // because concatenating two frozen strings (`'foo' 'bar'`) is still frozen. This holds true for
4820 // as long as this interpolation only consists of other string literals.
4821 if (!PM_NODE_FLAG_P(part, PM_STRING_FLAGS_FROZEN)) {
4822 pm_node_flag_unset(UP(node), PM_NODE_FLAG_STATIC_LITERAL);
4824 part->flags = (pm_node_flags_t) ((part->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4826 case PM_INTERPOLATED_STRING_NODE:
4827 if (PM_NODE_FLAG_P(part, PM_NODE_FLAG_STATIC_LITERAL)) {
4828 // If the string that we're concatenating is a static literal,
4829 // then we can keep the static literal flag for this string.
4831 // Otherwise, we lose the static literal flag here and we should
4832 // also clear the mutability flags.
4836 case PM_EMBEDDED_STATEMENTS_NODE: {
4837 pm_embedded_statements_node_t *cast = (pm_embedded_statements_node_t *) part;
4838 pm_node_t *embedded = (cast->statements != NULL && cast->statements->body.size == 1) ? cast->statements->body.nodes[0] : NULL;
4840 if (embedded == NULL) {
4841 // If we're embedding multiple statements or no statements, then
4842 // the string is not longer a static literal.
4844 } else if (PM_NODE_TYPE_P(embedded, PM_STRING_NODE)) {
4845 // If the embedded statement is a string, then we can make that
4846 // string as frozen and static literal, and not touch the static
4847 // literal status of this string.
4848 embedded->flags = (pm_node_flags_t) ((embedded->flags | PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN) & ~PM_STRING_FLAGS_MUTABLE);
4850 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4851 MUTABLE_FLAGS(node);
4853 } else if (PM_NODE_TYPE_P(embedded, PM_INTERPOLATED_STRING_NODE) && PM_NODE_FLAG_P(embedded, PM_NODE_FLAG_STATIC_LITERAL)) {
4854 // If the embedded statement is an interpolated string, but that
4855 // string is marked as static literal, then we can keep our
4856 // static literal status for this string.
4857 if (PM_NODE_FLAG_P(node, PM_NODE_FLAG_STATIC_LITERAL)) {
4858 MUTABLE_FLAGS(node);
4861 // In all other cases, we lose the static literal flag here and
4868 case PM_EMBEDDED_VARIABLE_NODE:
4869 // Embedded variables clear static literal, which means we also
4870 // should clear the mutability flags.
4873 case PM_X_STRING_NODE:
4874 case PM_INTERPOLATED_X_STRING_NODE:
4875 case PM_SYMBOL_NODE:
4876 case PM_INTERPOLATED_SYMBOL_NODE:
4877 // These will only happen in error cases. But we want to handle it
4878 // here so that we don't fail the assertion.
4882 assert(false && "unexpected node type");
4886 pm_node_list_append(arena, &node->parts, part);
4895static pm_interpolated_string_node_t *
4896pm_interpolated_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4897 pm_node_flags_t flags = PM_NODE_FLAG_STATIC_LITERAL;
4899 switch (parser->frozen_string_literal) {
4900 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
4901 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_MUTABLE;
4903 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
4904 flags |= PM_INTERPOLATED_STRING_NODE_FLAGS_FROZEN;
4908 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4909 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4911 pm_interpolated_string_node_t *node = pm_interpolated_string_node_new(
4915 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4916 NTOK2LOC(parser, opening),
4917 ((pm_node_list_t) { 0 }),
4918 NTOK2LOC(parser, closing)
4921 if (parts != NULL) {
4923 PM_NODE_LIST_FOREACH(parts, index, part) {
4924 pm_interpolated_string_node_append(parser->arena, node, part);
4935pm_interpolated_string_node_closing_set(const pm_parser_t *parser, pm_interpolated_string_node_t *node, const pm_token_t *closing) {
4936 node->closing_loc = TOK2LOC(parser, closing);
4937 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4941pm_interpolated_symbol_node_append(pm_arena_t *arena, pm_interpolated_symbol_node_t *node, pm_node_t *part) {
4942 if (node->parts.size == 0 && node->opening_loc.length == 0) {
4943 PM_NODE_START_SET_NODE(node, part);
4946 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
4948 if (PM_NODE_END(part) > PM_NODE_END(node)) {
4949 PM_NODE_LENGTH_SET_NODE(node, part);
4954pm_interpolated_symbol_node_closing_loc_set(const pm_parser_t *parser, pm_interpolated_symbol_node_t *node, const pm_token_t *closing) {
4955 node->closing_loc = TOK2LOC(parser, closing);
4956 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
4962static pm_interpolated_symbol_node_t *
4963pm_interpolated_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_node_list_t *parts, const pm_token_t *closing) {
4964 uint32_t start = opening == NULL ? 0 : PM_TOKEN_START(parser, opening);
4965 uint32_t end = closing == NULL ? 0 : PM_TOKEN_END(parser, closing);
4967 pm_interpolated_symbol_node_t *node = pm_interpolated_symbol_node_new(
4970 PM_NODE_FLAG_STATIC_LITERAL,
4971 ((pm_location_t) { .start = start, .length = U32(end - start) }),
4972 NTOK2LOC(parser, opening),
4973 ((pm_node_list_t) { 0 }),
4974 NTOK2LOC(parser, closing)
4977 if (parts != NULL) {
4979 PM_NODE_LIST_FOREACH(parts, index, part) {
4980 pm_interpolated_symbol_node_append(parser->arena, node, part);
4990static pm_interpolated_x_string_node_t *
4991pm_interpolated_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
4992 return pm_interpolated_x_string_node_new(
4996 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
4997 TOK2LOC(parser, opening),
4998 ((pm_node_list_t) { 0 }),
4999 TOK2LOC(parser, closing)
5004pm_interpolated_xstring_node_append(pm_arena_t *arena, pm_interpolated_x_string_node_t *node, pm_node_t *part) {
5005 pm_interpolated_node_append(arena, UP(node), &node->parts, part);
5006 PM_NODE_LENGTH_SET_NODE(node, part);
5010pm_interpolated_xstring_node_closing_set(const pm_parser_t *parser, pm_interpolated_x_string_node_t *node, const pm_token_t *closing) {
5011 node->closing_loc = TOK2LOC(parser, closing);
5012 PM_NODE_LENGTH_SET_TOKEN(parser, node, closing);
5018static pm_it_local_variable_read_node_t *
5019pm_it_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5020 return pm_it_local_variable_read_node_new(
5024 PM_LOCATION_INIT_TOKEN(parser, name)
5031static pm_it_parameters_node_t *
5032pm_it_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing) {
5033 return pm_it_parameters_node_new(
5037 PM_LOCATION_INIT_TOKENS(parser, opening, closing)
5044static pm_keyword_hash_node_t *
5045pm_keyword_hash_node_create(pm_parser_t *parser) {
5046 return pm_keyword_hash_node_new(
5049 PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS,
5050 PM_LOCATION_INIT_UNSET,
5051 ((pm_node_list_t) { 0 })
5059pm_keyword_hash_node_elements_append(pm_arena_t *arena, pm_keyword_hash_node_t *hash, pm_node_t *element) {
5060 // If the element being added is not an AssocNode or does not have a symbol
5061 // key, then we want to turn the SYMBOL_KEYS flag off.
5062 if (!PM_NODE_TYPE_P(element, PM_ASSOC_NODE) || !PM_NODE_TYPE_P(((pm_assoc_node_t *) element)->key, PM_SYMBOL_NODE)) {
5063 pm_node_flag_unset(UP(hash), PM_KEYWORD_HASH_NODE_FLAGS_SYMBOL_KEYS);
5066 pm_node_list_append(arena, &hash->elements, element);
5067 if (PM_NODE_LENGTH(hash) == 0) {
5068 PM_NODE_START_SET_NODE(hash, element);
5070 PM_NODE_LENGTH_SET_NODE(hash, element);
5076static pm_required_keyword_parameter_node_t *
5077pm_required_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name) {
5078 return pm_required_keyword_parameter_node_new(
5082 PM_LOCATION_INIT_TOKEN(parser, name),
5083 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5084 TOK2LOC(parser, name)
5091static pm_optional_keyword_parameter_node_t *
5092pm_optional_keyword_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, pm_node_t *value) {
5093 return pm_optional_keyword_parameter_node_new(
5097 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5098 pm_parser_constant_id_raw(parser, name->start, name->end - 1),
5099 TOK2LOC(parser, name),
5107static pm_keyword_rest_parameter_node_t *
5108pm_keyword_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
5109 return pm_keyword_rest_parameter_node_new(
5113 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
5114 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
5115 NTOK2LOC(parser, name),
5116 TOK2LOC(parser, operator)
5123static pm_lambda_node_t *
5124pm_lambda_node_create(
5125 pm_parser_t *parser,
5126 pm_constant_id_list_t *locals,
5127 const pm_token_t *operator,
5128 const pm_token_t *opening,
5129 const pm_token_t *closing,
5130 pm_node_t *parameters,
5133 return pm_lambda_node_new(
5137 PM_LOCATION_INIT_TOKENS(parser, operator, closing),
5139 TOK2LOC(parser, operator),
5140 TOK2LOC(parser, opening),
5141 TOK2LOC(parser, closing),
5150static pm_local_variable_and_write_node_t *
5151pm_local_variable_and_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5152 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5153 assert(operator->type == PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
5155 return pm_local_variable_and_write_node_new(
5159 PM_LOCATION_INIT_NODES(target, value),
5161 TOK2LOC(parser, operator),
5171static pm_local_variable_operator_write_node_t *
5172pm_local_variable_operator_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5173 return pm_local_variable_operator_write_node_new(
5177 PM_LOCATION_INIT_NODES(target, value),
5179 TOK2LOC(parser, operator),
5182 pm_parser_constant_id_raw(parser, operator->start, operator->end - 1),
5190static pm_local_variable_or_write_node_t *
5191pm_local_variable_or_write_node_create(pm_parser_t *parser, pm_node_t *target, const pm_token_t *operator, pm_node_t *value, pm_constant_id_t name, uint32_t depth) {
5192 assert(PM_NODE_TYPE_P(target, PM_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_IT_LOCAL_VARIABLE_READ_NODE) || PM_NODE_TYPE_P(target, PM_CALL_NODE));
5193 assert(operator->type == PM_TOKEN_PIPE_PIPE_EQUAL);
5195 return pm_local_variable_or_write_node_new(
5199 PM_LOCATION_INIT_NODES(target, value),
5201 TOK2LOC(parser, operator),
5211static pm_local_variable_read_node_t *
5212pm_local_variable_read_node_create_constant_id(pm_parser_t *parser, const pm_token_t *name, pm_constant_id_t name_id, uint32_t depth, bool missing) {
5213 if (!missing) pm_locals_read(&pm_parser_scope_find(parser, depth)->locals, name_id);
5215 return pm_local_variable_read_node_new(
5219 PM_LOCATION_INIT_TOKEN(parser, name),
5228static pm_local_variable_read_node_t *
5229pm_local_variable_read_node_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5230 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5231 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, false);
5238static pm_local_variable_read_node_t *
5239pm_local_variable_read_node_missing_create(pm_parser_t *parser, const pm_token_t *name, uint32_t depth) {
5240 pm_constant_id_t name_id = pm_parser_constant_id_token(parser, name);
5241 return pm_local_variable_read_node_create_constant_id(parser, name, name_id, depth, true);
5247static pm_local_variable_write_node_t *
5248pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name, uint32_t depth, pm_node_t *value, const pm_location_t *name_loc, const pm_token_t *operator) {
5249 return pm_local_variable_write_node_new(
5252 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5253 ((pm_location_t) { .start = name_loc->start, .length = PM_NODE_END(value) - name_loc->start }),
5258 TOK2LOC(parser, operator)
5266pm_token_is_it(const uint8_t *start, const uint8_t *end) {
5267 return (end - start == 2) && (start[0] == 'i') && (start[1] == 't');
5275pm_token_is_numbered_parameter(const pm_parser_t *parser, uint32_t start, uint32_t length) {
5278 (parser->start[start] == '_') &&
5279 (parser->start[start + 1] != '0') &&
5280 pm_char_is_decimal_digit(parser->start[start + 1])
5289pm_refute_numbered_parameter(pm_parser_t *parser, uint32_t start, uint32_t length) {
5290 if (pm_token_is_numbered_parameter(parser, start, length)) {
5291 PM_PARSER_ERR_FORMAT(parser, start, length, PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->start + start);
5299static pm_local_variable_target_node_t *
5300pm_local_variable_target_node_create(pm_parser_t *parser, const pm_location_t *location, pm_constant_id_t name, uint32_t depth) {
5301 pm_refute_numbered_parameter(parser, location->start, location->length);
5303 return pm_local_variable_target_node_new(
5307 ((pm_location_t) { .start = location->start, .length = location->length }),
5316static pm_match_predicate_node_t *
5317pm_match_predicate_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5318 pm_assert_value_expression(parser, value);
5320 return pm_match_predicate_node_new(
5324 PM_LOCATION_INIT_NODES(value, pattern),
5327 TOK2LOC(parser, operator)
5334static pm_match_required_node_t *
5335pm_match_required_node_create(pm_parser_t *parser, pm_node_t *value, pm_node_t *pattern, const pm_token_t *operator) {
5336 pm_assert_value_expression(parser, value);
5338 return pm_match_required_node_new(
5342 PM_LOCATION_INIT_NODES(value, pattern),
5345 TOK2LOC(parser, operator)
5352static pm_match_write_node_t *
5353pm_match_write_node_create(pm_parser_t *parser, pm_call_node_t *call) {
5354 return pm_match_write_node_new(
5358 PM_LOCATION_INIT_NODE(call),
5360 ((pm_node_list_t) { 0 })
5367static pm_module_node_t *
5368pm_module_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *module_keyword, pm_node_t *constant_path, const pm_token_t *name, pm_node_t *body, const pm_token_t *end_keyword) {
5369 return pm_module_node_new(
5373 PM_LOCATION_INIT_TOKENS(parser, module_keyword, end_keyword),
5374 (locals == NULL ? ((pm_constant_id_list_t) { .ids = NULL, .size = 0, .capacity = 0 }) : *locals),
5375 TOK2LOC(parser, module_keyword),
5378 TOK2LOC(parser, end_keyword),
5379 pm_parser_constant_id_token(parser, name)
5386static pm_multi_target_node_t *
5387pm_multi_target_node_create(pm_parser_t *parser) {
5388 return pm_multi_target_node_new(
5392 PM_LOCATION_INIT_UNSET,
5393 ((pm_node_list_t) { 0 }),
5395 ((pm_node_list_t) { 0 }),
5396 ((pm_location_t) { 0 }),
5397 ((pm_location_t) { 0 })
5405pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
5406 if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
5407 if (node->rest == NULL) {
5408 node->rest = target;
5410 pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
5411 pm_node_list_append(parser->arena, &node->rights, target);
5413 } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
5414 if (node->rest == NULL) {
5415 node->rest = target;
5417 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
5418 pm_node_list_append(parser->arena, &node->rights, target);
5420 } else if (node->rest == NULL) {
5421 pm_node_list_append(parser->arena, &node->lefts, target);
5423 pm_node_list_append(parser->arena, &node->rights, target);
5426 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_START(node) > PM_NODE_START(target))) {
5427 PM_NODE_START_SET_NODE(node, target);
5430 if (PM_NODE_LENGTH(node) == 0 || (PM_NODE_END(node) < PM_NODE_END(target))) {
5431 PM_NODE_LENGTH_SET_NODE(node, target);
5439pm_multi_target_node_opening_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *lparen) {
5440 PM_NODE_START_SET_TOKEN(parser, node, lparen);
5441 PM_NODE_LENGTH_SET_TOKEN(parser, node, lparen);
5442 node->lparen_loc = TOK2LOC(parser, lparen);
5449pm_multi_target_node_closing_set(const pm_parser_t *parser, pm_multi_target_node_t *node, const pm_token_t *rparen) {
5450 PM_NODE_LENGTH_SET_TOKEN(parser, node, rparen);
5451 node->rparen_loc = TOK2LOC(parser, rparen);
5457static pm_multi_write_node_t *
5458pm_multi_write_node_create(pm_parser_t *parser, pm_multi_target_node_t *target, const pm_token_t *operator, pm_node_t *value) {
5459 /* The target is no longer necessary because we have reused its children. It
5460 * is arena-allocated so no explicit free is needed. */
5461 return pm_multi_write_node_new(
5464 pm_implicit_array_write_flags(value, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY),
5465 PM_LOCATION_INIT_NODES(target, value),
5471 TOK2LOC(parser, operator),
5479static pm_next_node_t *
5480pm_next_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
5481 assert(keyword->type == PM_TOKEN_KEYWORD_NEXT);
5483 return pm_next_node_new(
5487 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
5489 TOK2LOC(parser, keyword)
5496static pm_nil_node_t *
5497pm_nil_node_create(pm_parser_t *parser, const pm_token_t *token) {
5498 assert(token->type == PM_TOKEN_KEYWORD_NIL);
5500 return pm_nil_node_new(
5503 PM_NODE_FLAG_STATIC_LITERAL,
5504 PM_LOCATION_INIT_TOKEN(parser, token)
5511static pm_no_block_parameter_node_t *
5512pm_no_block_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5513 assert(operator->type == PM_TOKEN_AMPERSAND || operator->type == PM_TOKEN_UAMPERSAND);
5514 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5516 return pm_no_block_parameter_node_new(
5520 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5521 TOK2LOC(parser, operator),
5522 TOK2LOC(parser, keyword)
5529static pm_no_keywords_parameter_node_t *
5530pm_no_keywords_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *keyword) {
5531 assert(operator->type == PM_TOKEN_USTAR_STAR || operator->type == PM_TOKEN_STAR_STAR);
5532 assert(keyword->type == PM_TOKEN_KEYWORD_NIL);
5534 return pm_no_keywords_parameter_node_new(
5538 PM_LOCATION_INIT_TOKENS(parser, operator, keyword),
5539 TOK2LOC(parser, operator),
5540 TOK2LOC(parser, keyword)
5547static pm_numbered_parameters_node_t *
5548pm_numbered_parameters_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *closing, uint8_t maximum) {
5549 return pm_numbered_parameters_node_new(
5553 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5562#define NTH_REF_MAX ((uint32_t) (INT_MAX >> 1))
5571pm_numbered_reference_read_node_number(pm_parser_t *parser, const pm_token_t *token) {
5572 const uint8_t *start = token->start + 1;
5573 const uint8_t *end = token->end;
5575 ptrdiff_t diff = end - start;
5577#if PTRDIFF_MAX > SIZE_MAX
5578 assert(diff < (ptrdiff_t) SIZE_MAX);
5580 size_t length = (size_t) diff;
5582 char *digits = xcalloc(length + 1, sizeof(char));
5583 memcpy(digits, start, length);
5584 digits[length] = '\0';
5588 unsigned long value = strtoul(digits, &endptr, 10);
5590 if ((digits == endptr) || (*endptr != '\0')) {
5591 pm_parser_err(parser, U32(start - parser->start), U32(length), PM_ERR_INVALID_NUMBER_DECIMAL);
5595 xfree_sized(digits, sizeof(char) * (length + 1));
5597 if ((errno == ERANGE) || (value > NTH_REF_MAX)) {
5598 PM_PARSER_WARN_FORMAT(parser, U32(start - parser->start), U32(length), PM_WARN_INVALID_NUMBERED_REFERENCE, (int) (length + 1), (const char *) token->start);
5602 return (uint32_t) value;
5610static pm_numbered_reference_read_node_t *
5611pm_numbered_reference_read_node_create(pm_parser_t *parser, const pm_token_t *name) {
5612 assert(name->type == PM_TOKEN_NUMBERED_REFERENCE);
5614 return pm_numbered_reference_read_node_new(
5618 PM_LOCATION_INIT_TOKEN(parser, name),
5619 pm_numbered_reference_read_node_number(parser, name)
5626static pm_optional_parameter_node_t *
5627pm_optional_parameter_node_create(pm_parser_t *parser, const pm_token_t *name, const pm_token_t *operator, pm_node_t *value) {
5628 return pm_optional_parameter_node_new(
5632 PM_LOCATION_INIT_TOKEN_NODE(parser, name, value),
5633 pm_parser_constant_id_token(parser, name),
5634 TOK2LOC(parser, name),
5635 TOK2LOC(parser, operator),
5643static pm_or_node_t *
5644pm_or_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5645 pm_assert_value_expression(parser, left);
5647 return pm_or_node_new(
5651 PM_LOCATION_INIT_NODES(left, right),
5654 TOK2LOC(parser, operator)
5661static pm_parameters_node_t *
5662pm_parameters_node_create(pm_parser_t *parser) {
5663 return pm_parameters_node_new(
5667 PM_LOCATION_INIT_UNSET,
5668 ((pm_node_list_t) { 0 }),
5669 ((pm_node_list_t) { 0 }),
5671 ((pm_node_list_t) { 0 }),
5672 ((pm_node_list_t) { 0 }),
5682pm_parameters_node_location_set(pm_parameters_node_t *params, pm_node_t *param) {
5683 if ((params->base.location.length == 0) || PM_NODE_START(params) > PM_NODE_START(param)) {
5684 PM_NODE_START_SET_NODE(params, param);
5687 if ((params->base.location.length == 0) || (PM_NODE_END(params) < PM_NODE_END(param))) {
5688 PM_NODE_LENGTH_SET_NODE(params, param);
5696pm_parameters_node_requireds_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5697 pm_parameters_node_location_set(params, param);
5698 pm_node_list_append(arena, ¶ms->requireds, param);
5705pm_parameters_node_optionals_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_optional_parameter_node_t *param) {
5706 pm_parameters_node_location_set(params, UP(param));
5707 pm_node_list_append(arena, ¶ms->optionals, UP(param));
5714pm_parameters_node_posts_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5715 pm_parameters_node_location_set(params, param);
5716 pm_node_list_append(arena, ¶ms->posts, param);
5723pm_parameters_node_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5724 pm_parameters_node_location_set(params, param);
5725 params->rest = param;
5732pm_parameters_node_keywords_append(pm_arena_t *arena, pm_parameters_node_t *params, pm_node_t *param) {
5733 pm_parameters_node_location_set(params, param);
5734 pm_node_list_append(arena, ¶ms->keywords, param);
5741pm_parameters_node_keyword_rest_set(pm_parameters_node_t *params, pm_node_t *param) {
5742 assert(params->keyword_rest == NULL);
5743 pm_parameters_node_location_set(params, param);
5744 params->keyword_rest = param;
5751pm_parameters_node_block_set(pm_parameters_node_t *params, pm_node_t *param) {
5752 assert(params->block == NULL);
5753 pm_parameters_node_location_set(params, param);
5754 params->block = param;
5760static pm_program_node_t *
5761pm_program_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, pm_statements_node_t *statements) {
5762 return pm_program_node_new(
5766 PM_LOCATION_INIT_NODE(statements),
5775static pm_parentheses_node_t *
5776pm_parentheses_node_create(pm_parser_t *parser, const pm_token_t *opening, pm_node_t *body, const pm_token_t *closing, pm_node_flags_t flags) {
5777 return pm_parentheses_node_new(
5781 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5783 TOK2LOC(parser, opening),
5784 TOK2LOC(parser, closing)
5791static pm_pinned_expression_node_t *
5792pm_pinned_expression_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *operator, const pm_token_t *lparen, const pm_token_t *rparen) {
5793 return pm_pinned_expression_node_new(
5797 PM_LOCATION_INIT_TOKENS(parser, operator, rparen),
5799 TOK2LOC(parser, operator),
5800 TOK2LOC(parser, lparen),
5801 TOK2LOC(parser, rparen)
5808static pm_pinned_variable_node_t *
5809pm_pinned_variable_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *variable) {
5810 return pm_pinned_variable_node_new(
5814 PM_LOCATION_INIT_TOKEN_NODE(parser, operator, variable),
5816 TOK2LOC(parser, operator)
5823static pm_post_execution_node_t *
5824pm_post_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5825 return pm_post_execution_node_new(
5829 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
5831 TOK2LOC(parser, keyword),
5832 TOK2LOC(parser, opening),
5833 TOK2LOC(parser, closing)
5840static pm_pre_execution_node_t *
5841pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *opening, pm_statements_node_t *statements, const pm_token_t *closing) {
5842 return pm_pre_execution_node_new(
5846 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
5848 TOK2LOC(parser, keyword),
5849 TOK2LOC(parser, opening),
5850 TOK2LOC(parser, closing)
5857static pm_range_node_t *
5858pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
5859 pm_assert_value_expression(parser, left);
5860 pm_assert_value_expression(parser, right);
5861 pm_node_flags_t flags = 0;
5863 // Indicate that this node is an exclusive range if the operator is `...`.
5864 if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
5865 flags |= PM_RANGE_FLAGS_EXCLUDE_END;
5868 // Indicate that this node is a static literal (i.e., can be compiled with
5869 // a putobject in CRuby) if the left and right are implicit nil, explicit
5870 // nil, or integers.
5872 (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
5873 (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
5875 flags |= PM_NODE_FLAG_STATIC_LITERAL;
5878 uint32_t start = left == NULL ? PM_TOKEN_START(parser, operator) : PM_NODE_START(left);
5879 uint32_t end = right == NULL ? PM_TOKEN_END(parser, operator) : PM_NODE_END(right);
5881 return pm_range_node_new(
5885 ((pm_location_t) { .start = start, .length = U32(end - start) }),
5888 TOK2LOC(parser, operator)
5895static pm_redo_node_t *
5896pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
5897 assert(token->type == PM_TOKEN_KEYWORD_REDO);
5899 return pm_redo_node_new(
5903 PM_LOCATION_INIT_TOKEN(parser, token)
5911static pm_regular_expression_node_t *
5912pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
5913 return pm_regular_expression_node_new(
5916 pm_regular_expression_flags_create(parser, closing) | PM_NODE_FLAG_STATIC_LITERAL,
5917 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
5918 TOK2LOC(parser, opening),
5919 TOK2LOC(parser, content),
5920 TOK2LOC(parser, closing),
5928static inline pm_regular_expression_node_t *
5929pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
5930 return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
5936static pm_required_parameter_node_t *
5937pm_required_parameter_node_create(pm_parser_t *parser, const pm_token_t *token) {
5938 return pm_required_parameter_node_new(
5942 PM_LOCATION_INIT_TOKEN(parser, token),
5943 pm_parser_constant_id_token(parser, token)
5950static pm_rescue_modifier_node_t *
5951pm_rescue_modifier_node_create(pm_parser_t *parser, pm_node_t *expression, const pm_token_t *keyword, pm_node_t *rescue_expression) {
5952 return pm_rescue_modifier_node_new(
5956 PM_LOCATION_INIT_NODES(expression, rescue_expression),
5958 TOK2LOC(parser, keyword),
5966static pm_rescue_node_t *
5967pm_rescue_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
5968 return pm_rescue_node_new(
5972 PM_LOCATION_INIT_TOKEN(parser, keyword),
5973 TOK2LOC(parser, keyword),
5974 ((pm_node_list_t) { 0 }),
5975 ((pm_location_t) { 0 }),
5977 ((pm_location_t) { 0 }),
5984pm_rescue_node_operator_set(const pm_parser_t *parser, pm_rescue_node_t *node, const pm_token_t *operator) {
5985 node->operator_loc = TOK2LOC(parser, operator);
5992pm_rescue_node_reference_set(pm_rescue_node_t *node, pm_node_t *reference) {
5993 node->reference = reference;
5994 PM_NODE_LENGTH_SET_NODE(node, reference);
6001pm_rescue_node_statements_set(pm_rescue_node_t *node, pm_statements_node_t *statements) {
6002 node->statements = statements;
6003 if (pm_statements_node_body_length(statements) > 0) {
6004 PM_NODE_LENGTH_SET_NODE(node, statements);
6012pm_rescue_node_subsequent_set(pm_rescue_node_t *node, pm_rescue_node_t *subsequent) {
6013 node->subsequent = subsequent;
6014 PM_NODE_LENGTH_SET_NODE(node, subsequent);
6021pm_rescue_node_exceptions_append(pm_arena_t *arena, pm_rescue_node_t *node, pm_node_t *exception) {
6022 pm_node_list_append(arena, &node->exceptions, exception);
6023 PM_NODE_LENGTH_SET_NODE(node, exception);
6029static pm_rest_parameter_node_t *
6030pm_rest_parameter_node_create(pm_parser_t *parser, const pm_token_t *operator, const pm_token_t *name) {
6031 return pm_rest_parameter_node_new(
6035 (name == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKENS(parser, operator, name),
6036 name == NULL ? 0 : pm_parser_constant_id_token(parser, name),
6037 NTOK2LOC(parser, name),
6038 TOK2LOC(parser, operator)
6045static pm_retry_node_t *
6046pm_retry_node_create(pm_parser_t *parser, const pm_token_t *token) {
6047 assert(token->type == PM_TOKEN_KEYWORD_RETRY);
6049 return pm_retry_node_new(
6053 PM_LOCATION_INIT_TOKEN(parser, token)
6060static pm_return_node_t *
6061pm_return_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_node_t *arguments) {
6062 return pm_return_node_new(
6066 (arguments == NULL) ? PM_LOCATION_INIT_TOKEN(parser, keyword) : PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, arguments),
6067 TOK2LOC(parser, keyword),
6075static pm_self_node_t *
6076pm_self_node_create(pm_parser_t *parser, const pm_token_t *token) {
6077 assert(token->type == PM_TOKEN_KEYWORD_SELF);
6079 return pm_self_node_new(
6083 PM_LOCATION_INIT_TOKEN(parser, token)
6090static pm_shareable_constant_node_t *
6091pm_shareable_constant_node_create(pm_parser_t *parser, pm_node_t *write, pm_shareable_constant_value_t value) {
6092 return pm_shareable_constant_node_new(
6095 (pm_node_flags_t) value,
6096 PM_LOCATION_INIT_NODE(write),
6104static pm_singleton_class_node_t *
6105pm_singleton_class_node_create(pm_parser_t *parser, pm_constant_id_list_t *locals, const pm_token_t *class_keyword, const pm_token_t *operator, pm_node_t *expression, pm_node_t *body, const pm_token_t *end_keyword) {
6106 return pm_singleton_class_node_new(
6110 PM_LOCATION_INIT_TOKENS(parser, class_keyword, end_keyword),
6112 TOK2LOC(parser, class_keyword),
6113 TOK2LOC(parser, operator),
6116 TOK2LOC(parser, end_keyword)
6123static pm_source_encoding_node_t *
6124pm_source_encoding_node_create(pm_parser_t *parser, const pm_token_t *token) {
6125 assert(token->type == PM_TOKEN_KEYWORD___ENCODING__);
6127 return pm_source_encoding_node_new(
6130 PM_NODE_FLAG_STATIC_LITERAL,
6131 PM_LOCATION_INIT_TOKEN(parser, token)
6138static pm_source_file_node_t*
6139pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword) {
6140 assert(file_keyword->type == PM_TOKEN_KEYWORD___FILE__);
6142 pm_node_flags_t flags = 0;
6144 switch (parser->frozen_string_literal) {
6145 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6146 flags |= PM_STRING_FLAGS_MUTABLE;
6148 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6149 flags |= PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6153 return pm_source_file_node_new(
6157 PM_LOCATION_INIT_TOKEN(parser, file_keyword),
6165static pm_source_line_node_t *
6166pm_source_line_node_create(pm_parser_t *parser, const pm_token_t *token) {
6167 assert(token->type == PM_TOKEN_KEYWORD___LINE__);
6169 return pm_source_line_node_new(
6172 PM_NODE_FLAG_STATIC_LITERAL,
6173 PM_LOCATION_INIT_TOKEN(parser, token)
6180static pm_splat_node_t *
6181pm_splat_node_create(pm_parser_t *parser, const pm_token_t *operator, pm_node_t *expression) {
6182 return pm_splat_node_new(
6186 (expression == NULL) ? PM_LOCATION_INIT_TOKEN(parser, operator) : PM_LOCATION_INIT_TOKEN_NODE(parser, operator, expression),
6187 TOK2LOC(parser, operator),
6195static pm_statements_node_t *
6196pm_statements_node_create(pm_parser_t *parser) {
6197 return pm_statements_node_new(
6201 PM_LOCATION_INIT_UNSET,
6202 ((pm_node_list_t) { 0 })
6210pm_statements_node_body_length(pm_statements_node_t *node) {
6211 return node && node->body.size;
6219pm_statements_node_body_update(pm_statements_node_t *node, pm_node_t *statement) {
6220 if (pm_statements_node_body_length(node) == 0 || PM_NODE_START(statement) < PM_NODE_START(node)) {
6221 PM_NODE_START_SET_NODE(node, statement);
6224 if (PM_NODE_END(statement) > PM_NODE_END(node)) {
6225 PM_NODE_LENGTH_SET_NODE(node, statement);
6233pm_statements_node_body_append(pm_parser_t *parser, pm_statements_node_t *node, pm_node_t *statement, bool newline) {
6234 pm_statements_node_body_update(node, statement);
6236 if (node->body.size > 0) {
6237 const pm_node_t *previous = node->body.nodes[node->body.size - 1];
6239 switch (PM_NODE_TYPE(previous)) {
6244 case PM_RETURN_NODE:
6245 pm_parser_warn_node(parser, statement, PM_WARN_UNREACHABLE_STATEMENT);
6252 pm_node_list_append(parser->arena, &node->body, statement);
6253 if (newline) pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6260pm_statements_node_body_prepend(pm_arena_t *arena, pm_statements_node_t *node, pm_node_t *statement) {
6261 pm_statements_node_body_update(node, statement);
6262 pm_node_list_prepend(arena, &node->body, statement);
6263 pm_node_flag_set(statement, PM_NODE_FLAG_NEWLINE);
6269static inline pm_string_node_t *
6270pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
6271 pm_node_flags_t flags = 0;
6273 switch (parser->frozen_string_literal) {
6274 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6275 flags = PM_STRING_FLAGS_MUTABLE;
6277 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6278 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6282 uint32_t start = PM_TOKEN_START(parser, opening == NULL ? content : opening);
6283 uint32_t end = PM_TOKEN_END(parser, closing == NULL ? content : closing);
6285 return pm_string_node_new(
6289 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6290 NTOK2LOC(parser, opening),
6291 TOK2LOC(parser, content),
6292 NTOK2LOC(parser, closing),
6300static pm_string_node_t *
6301pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6302 return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
6309static pm_string_node_t *
6310pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
6311 pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
6312 parser->current_string = PM_STRING_EMPTY;
6319static pm_super_node_t *
6320pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
6321 assert(keyword->type == PM_TOKEN_KEYWORD_SUPER);
6323 const pm_location_t *end = pm_arguments_end(arguments);
6324 assert(end != NULL && "unreachable");
6326 return pm_super_node_new(
6330 ((pm_location_t) { .start = PM_TOKEN_START(parser, keyword), .length = PM_LOCATION_END(end) - PM_TOKEN_START(parser, keyword) }),
6331 TOK2LOC(parser, keyword),
6332 arguments->opening_loc,
6333 arguments->arguments,
6334 arguments->closing_loc,
6344pm_ascii_only_p(const pm_string_t *contents) {
6345 const size_t length = pm_string_length(contents);
6346 const uint8_t *source = pm_string_source(contents);
6348 for (size_t index = 0; index < length; index++) {
6349 if (source[index] & 0x80) return false;
6359parse_symbol_encoding_validate_utf8(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6360 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6361 size_t width = pm_encoding_utf_8_char_width(cursor, end - cursor);
6364 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6377parse_symbol_encoding_validate_other(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents) {
6378 const pm_encoding_t *encoding = parser->encoding;
6380 for (const uint8_t *cursor = pm_string_source(contents), *end = cursor + pm_string_length(contents); cursor < end;) {
6381 size_t width = encoding->char_width(cursor, end - cursor);
6384 pm_parser_err(parser, PM_TOKEN_START(parser, location), PM_TOKEN_LENGTH(location), PM_ERR_INVALID_SYMBOL);
6401static inline pm_node_flags_t
6402parse_symbol_encoding(pm_parser_t *parser, const pm_token_t *location, const pm_string_t *contents, bool validate) {
6403 if (parser->explicit_encoding != NULL) {
6404 // A Symbol may optionally have its encoding explicitly set. This will
6405 // happen if an escape sequence results in a non-ASCII code point.
6406 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6407 if (validate) parse_symbol_encoding_validate_utf8(parser, location, contents);
6408 return PM_SYMBOL_FLAGS_FORCED_UTF8_ENCODING;
6409 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6410 return PM_SYMBOL_FLAGS_FORCED_BINARY_ENCODING;
6411 } else if (validate) {
6412 parse_symbol_encoding_validate_other(parser, location, contents);
6414 } else if (pm_ascii_only_p(contents)) {
6415 // Ruby stipulates that all source files must use an ASCII-compatible
6416 // encoding. Thus, all symbols appearing in source are eligible for
6417 // "downgrading" to US-ASCII.
6418 return PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING;
6419 } else if (validate) {
6420 parse_symbol_encoding_validate_other(parser, location, contents);
6426static pm_node_flags_t
6427parse_and_validate_regular_expression_encoding_modifier(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags, char modifier, const pm_encoding_t *modifier_encoding) {
6428 assert ((modifier == 'n' && modifier_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) ||
6429 (modifier == 'u' && modifier_encoding == PM_ENCODING_UTF_8_ENTRY) ||
6430 (modifier == 'e' && modifier_encoding == PM_ENCODING_EUC_JP_ENTRY) ||
6431 (modifier == 's' && modifier_encoding == PM_ENCODING_WINDOWS_31J_ENTRY));
6433 // There's special validation logic used if a string does not contain any character escape sequences.
6434 if (parser->explicit_encoding == NULL) {
6435 // If an ASCII-only string without character escapes is used with an encoding modifier, then resulting Regexp
6436 // has the modifier encoding, unless the ASCII-8BIT modifier is used, in which case the Regexp "downgrades" to
6437 // the US-ASCII encoding.
6439 return modifier == 'n' ? PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING : flags;
6442 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6444 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6446 } else if (parser->encoding != modifier_encoding) {
6447 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_ENCODING_OPTION_MISMATCH, modifier, parser->encoding->name);
6449 if (modifier == 'n' && !ascii_only) {
6450 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_NON_ESCAPED_MBC, (int) pm_string_length(source), (const char *) pm_string_source(source));
6457 // TODO (nirvdrum 21-Feb-2024): To validate regexp sources with character escape sequences we need to know whether hex or Unicode escape sequences were used and Prism doesn't currently provide that data. We handle a subset of unambiguous cases in the meanwhile.
6458 bool mixed_encoding = false;
6460 if (mixed_encoding) {
6461 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6462 } else if (modifier != 'n' && parser->explicit_encoding == PM_ENCODING_ASCII_8BIT_ENTRY) {
6463 // TODO (nirvdrum 21-Feb-2024): Validate the content is valid in the modifier encoding. Do this on-demand so we don't pay the cost of computation unnecessarily.
6464 bool valid_string_in_modifier_encoding = true;
6466 if (!valid_string_in_modifier_encoding) {
6467 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_ESCAPE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6469 } else if (modifier != 'u' && parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6470 // TODO (nirvdrum 21-Feb-2024): There's currently no way to tell if the source used hex or Unicode character escapes from `explicit_encoding` alone. If the source encoding was already UTF-8, both character escape types would set `explicit_encoding` to UTF-8, but need to be processed differently. Skip for now.
6471 if (parser->encoding != PM_ENCODING_UTF_8_ENTRY) {
6472 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INCOMPAT_CHAR_ENCODING, (int) pm_string_length(source), (const char *) pm_string_source(source));
6476 // We've determined the encoding would naturally be EUC-JP and there is no need to force the encoding to anything else.
6486static pm_node_flags_t
6487parse_and_validate_regular_expression_encoding(pm_parser_t *parser, const pm_string_t *source, bool ascii_only, pm_node_flags_t flags) {
6488 // TODO (nirvdrum 22-Feb-2024): CRuby reports a special Regexp-specific error for invalid Unicode ranges. We either need to scan again or modify the "invalid Unicode escape sequence" message we already report.
6489 bool valid_unicode_range = true;
6490 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY && !valid_unicode_range) {
6491 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_REGEXP_INVALID_UNICODE_RANGE, (int) pm_string_length(source), (const char *) pm_string_source(source));
6495 // US-ASCII strings do not admit multi-byte character literals. However, character escape sequences corresponding
6496 // to multi-byte characters are allowed.
6497 if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY && parser->explicit_encoding == NULL && !ascii_only) {
6498 // CRuby will continue processing even though a SyntaxError has already been detected. It may result in the
6499 // following error message appearing twice. We do the same for compatibility.
6500 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->current, PM_ERR_INVALID_MULTIBYTE_CHAR, parser->encoding->name);
6511 if (flags & PM_REGULAR_EXPRESSION_FLAGS_ASCII_8BIT) {
6512 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'n', PM_ENCODING_ASCII_8BIT_ENTRY);
6515 if (flags & PM_REGULAR_EXPRESSION_FLAGS_UTF_8) {
6516 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'u', PM_ENCODING_UTF_8_ENTRY);
6519 if (flags & PM_REGULAR_EXPRESSION_FLAGS_EUC_JP) {
6520 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 'e', PM_ENCODING_EUC_JP_ENTRY);
6523 if (flags & PM_REGULAR_EXPRESSION_FLAGS_WINDOWS_31J) {
6524 return parse_and_validate_regular_expression_encoding_modifier(parser, source, ascii_only, flags, 's', PM_ENCODING_WINDOWS_31J_ENTRY);
6527 // At this point no encoding modifiers will be present on the regular expression as they would have already
6528 // been processed. Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all
6529 // regular expressions without an encoding modifier appearing in source are eligible for "downgrading" to US-ASCII.
6531 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
6534 // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
6535 // or by specifying a modifier.
6537 // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
6538 if (parser->explicit_encoding != NULL) {
6539 if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
6540 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
6541 } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
6542 return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
6553static pm_symbol_node_t *
6554pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped, pm_node_flags_t flags) {
6555 uint32_t start = opening == NULL ? PM_TOKEN_START(parser, value) : PM_TOKEN_START(parser, opening);
6556 uint32_t end = closing == NULL ? PM_TOKEN_END(parser, value) : PM_TOKEN_END(parser, closing);
6558 return pm_symbol_node_new(
6561 PM_NODE_FLAG_STATIC_LITERAL | flags,
6562 ((pm_location_t) { .start = start, .length = U32(end - start) }),
6563 NTOK2LOC(parser, opening),
6564 NTOK2LOC(parser, value),
6565 NTOK2LOC(parser, closing),
6573static inline pm_symbol_node_t *
6574pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6575 return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_STRING_EMPTY, 0);
6581static pm_symbol_node_t *
6582pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
6583 pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string, parse_symbol_encoding(parser, value, &parser->current_string, false));
6584 parser->current_string = PM_STRING_EMPTY;
6591static pm_symbol_node_t *
6592pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
6593 assert(token->type == PM_TOKEN_LABEL);
6595 pm_token_t closing = { .type = PM_TOKEN_LABEL_END, .start = token->end - 1, .end = token->end };
6596 pm_token_t label = { .type = PM_TOKEN_LABEL, .start = token->start, .end = token->end - 1 };
6597 pm_symbol_node_t *node = pm_symbol_node_create(parser, NULL, &label, &closing);
6599 assert((label.end - label.start) >= 0);
6600 pm_string_shared_init(&node->unescaped, label.start, label.end);
6601 pm_node_flag_set(UP(node), parse_symbol_encoding(parser, &label, &node->unescaped, false));
6609static pm_symbol_node_t *
6610pm_symbol_node_synthesized_create(pm_parser_t *parser, const char *content) {
6611 pm_symbol_node_t *node = pm_symbol_node_new(
6614 PM_NODE_FLAG_STATIC_LITERAL | PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING,
6615 PM_LOCATION_INIT_UNSET,
6616 ((pm_location_t) { 0 }),
6617 ((pm_location_t) { 0 }),
6618 ((pm_location_t) { 0 }),
6619 ((pm_string_t) { 0 })
6622 pm_string_constant_init(&node->unescaped, content, strlen(content));
6630pm_symbol_node_label_p(const pm_parser_t *parser, const pm_node_t *node) {
6631 const pm_location_t *location = NULL;
6633 switch (PM_NODE_TYPE(node)) {
6634 case PM_SYMBOL_NODE: {
6635 const pm_symbol_node_t *cast = (pm_symbol_node_t *) node;
6636 if (cast->closing_loc.length > 0) {
6637 location = &cast->closing_loc;
6641 case PM_INTERPOLATED_SYMBOL_NODE: {
6642 const pm_interpolated_symbol_node_t *cast = (pm_interpolated_symbol_node_t *) node;
6643 if (cast->closing_loc.length > 0) {
6644 location = &cast->closing_loc;
6652 return (location != NULL) && (parser->start[PM_LOCATION_END(location) - 1] == ':');
6658static pm_symbol_node_t *
6659pm_string_node_to_symbol_node(pm_parser_t *parser, pm_string_node_t *node, const pm_token_t *opening, const pm_token_t *closing) {
6660 pm_symbol_node_t *new_node = pm_symbol_node_new(
6663 PM_NODE_FLAG_STATIC_LITERAL,
6664 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
6665 TOK2LOC(parser, opening),
6667 TOK2LOC(parser, closing),
6671 pm_token_t content = {
6672 .type = PM_TOKEN_IDENTIFIER,
6673 .start = parser->start + node->content_loc.start,
6674 .end = parser->start + node->content_loc.start + node->content_loc.length
6677 pm_node_flag_set(UP(new_node), parse_symbol_encoding(parser, &content, &node->unescaped, true));
6679 /* The old node is arena-allocated so no explicit free is needed. */
6686static pm_string_node_t *
6687pm_symbol_node_to_string_node(pm_parser_t *parser, pm_symbol_node_t *node) {
6688 pm_node_flags_t flags = 0;
6690 switch (parser->frozen_string_literal) {
6691 case PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED:
6692 flags = PM_STRING_FLAGS_MUTABLE;
6694 case PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED:
6695 flags = PM_NODE_FLAG_STATIC_LITERAL | PM_STRING_FLAGS_FROZEN;
6699 pm_string_node_t *new_node = pm_string_node_new(
6703 PM_LOCATION_INIT_NODE(node),
6710 /* The old node is arena-allocated so no explicit free is needed. */
6717static pm_true_node_t *
6718pm_true_node_create(pm_parser_t *parser, const pm_token_t *token) {
6719 assert(token->type == PM_TOKEN_KEYWORD_TRUE);
6721 return pm_true_node_new(
6724 PM_NODE_FLAG_STATIC_LITERAL,
6725 PM_LOCATION_INIT_TOKEN(parser, token)
6732static pm_true_node_t *
6733pm_true_node_synthesized_create(pm_parser_t *parser) {
6734 return pm_true_node_new(
6737 PM_NODE_FLAG_STATIC_LITERAL,
6738 PM_LOCATION_INIT_UNSET
6745static pm_undef_node_t *
6746pm_undef_node_create(pm_parser_t *parser, const pm_token_t *token) {
6747 assert(token->type == PM_TOKEN_KEYWORD_UNDEF);
6749 return pm_undef_node_new(
6753 PM_LOCATION_INIT_TOKEN(parser, token),
6754 ((pm_node_list_t) { 0 }),
6755 TOK2LOC(parser, token)
6763pm_undef_node_append(pm_arena_t *arena, pm_undef_node_t *node, pm_node_t *name) {
6764 PM_NODE_LENGTH_SET_NODE(node, name);
6765 pm_node_list_append(arena, &node->names, name);
6771static pm_unless_node_t *
6772pm_unless_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, const pm_token_t *then_keyword, pm_statements_node_t *statements) {
6773 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6774 pm_node_t *end = statements == NULL ? predicate : UP(statements);
6776 return pm_unless_node_new(
6779 PM_NODE_FLAG_NEWLINE,
6780 PM_LOCATION_INIT_TOKEN_NODE(parser, keyword, end),
6781 TOK2LOC(parser, keyword),
6783 NTOK2LOC(parser, then_keyword),
6786 ((pm_location_t) { 0 })
6793static pm_unless_node_t *
6794pm_unless_node_modifier_create(pm_parser_t *parser, pm_node_t *statement, const pm_token_t *unless_keyword, pm_node_t *predicate) {
6795 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6797 pm_statements_node_t *statements = pm_statements_node_create(parser);
6798 pm_statements_node_body_append(parser, statements, statement, true);
6800 return pm_unless_node_new(
6803 PM_NODE_FLAG_NEWLINE,
6804 PM_LOCATION_INIT_NODES(statement, predicate),
6805 TOK2LOC(parser, unless_keyword),
6807 ((pm_location_t) { 0 }),
6810 ((pm_location_t) { 0 })
6815pm_unless_node_end_keyword_loc_set(const pm_parser_t *parser, pm_unless_node_t *node, const pm_token_t *end_keyword) {
6816 node->end_keyword_loc = TOK2LOC(parser, end_keyword);
6817 PM_NODE_LENGTH_SET_TOKEN(parser, node, end_keyword);
6826pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statements) {
6827 assert(parser->current_block_exits != NULL);
6829 // All of the block exits that we want to remove should be within the
6830 // statements, and since we are modifying the statements, we shouldn't have
6831 // to check the end location.
6832 uint32_t start = statements->base.location.start;
6834 for (size_t index = parser->current_block_exits->size; index > 0; index--) {
6835 pm_node_t *block_exit = parser->current_block_exits->nodes[index - 1];
6836 if (block_exit->location.start < start) break;
6838 // Implicitly remove from the list by lowering the size.
6839 parser->current_block_exits->size--;
6846static pm_until_node_t *
6847pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6848 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6850 return pm_until_node_new(
6854 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6855 TOK2LOC(parser, keyword),
6856 NTOK2LOC(parser, do_keyword),
6857 TOK2LOC(parser, closing),
6866static pm_until_node_t *
6867pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6868 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6869 pm_loop_modifier_block_exits(parser, statements);
6871 return pm_until_node_new(
6875 PM_LOCATION_INIT_NODES(statements, predicate),
6876 TOK2LOC(parser, keyword),
6877 ((pm_location_t) { 0 }),
6878 ((pm_location_t) { 0 }),
6887static pm_when_node_t *
6888pm_when_node_create(pm_parser_t *parser, const pm_token_t *keyword) {
6889 return pm_when_node_new(
6893 PM_LOCATION_INIT_TOKEN(parser, keyword),
6894 TOK2LOC(parser, keyword),
6895 ((pm_node_list_t) { 0 }),
6896 ((pm_location_t) { 0 }),
6905pm_when_node_conditions_append(pm_arena_t *arena, pm_when_node_t *node, pm_node_t *condition) {
6906 PM_NODE_LENGTH_SET_NODE(node, condition);
6907 pm_node_list_append(arena, &node->conditions, condition);
6914pm_when_node_then_keyword_loc_set(const pm_parser_t *parser, pm_when_node_t *node, const pm_token_t *then_keyword) {
6915 PM_NODE_LENGTH_SET_TOKEN(parser, node, then_keyword);
6916 node->then_keyword_loc = TOK2LOC(parser, then_keyword);
6923pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statements) {
6924 if (PM_NODE_END(statements) > PM_NODE_END(node)) {
6925 PM_NODE_LENGTH_SET_NODE(node, statements);
6928 node->statements = statements;
6934static pm_while_node_t *
6935pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6936 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6938 return pm_while_node_new(
6942 PM_LOCATION_INIT_TOKENS(parser, keyword, closing),
6943 TOK2LOC(parser, keyword),
6944 NTOK2LOC(parser, do_keyword),
6945 TOK2LOC(parser, closing),
6954static pm_while_node_t *
6955pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
6956 pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
6957 pm_loop_modifier_block_exits(parser, statements);
6959 return pm_while_node_new(
6963 PM_LOCATION_INIT_NODES(statements, predicate),
6964 TOK2LOC(parser, keyword),
6965 ((pm_location_t) { 0 }),
6966 ((pm_location_t) { 0 }),
6975static pm_while_node_t *
6976pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_statements_node_t *statements) {
6977 return pm_while_node_new(
6981 PM_LOCATION_INIT_UNSET,
6982 ((pm_location_t) { 0 }),
6983 ((pm_location_t) { 0 }),
6984 ((pm_location_t) { 0 }),
6994static pm_x_string_node_t *
6995pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
6996 return pm_x_string_node_new(
6999 PM_STRING_FLAGS_FROZEN,
7000 PM_LOCATION_INIT_TOKENS(parser, opening, closing),
7001 TOK2LOC(parser, opening),
7002 TOK2LOC(parser, content),
7003 TOK2LOC(parser, closing),
7011static inline pm_x_string_node_t *
7012pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
7013 return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_STRING_EMPTY);
7019static pm_yield_node_t *
7020pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
7021 uint32_t start = PM_TOKEN_START(parser, keyword);
7024 if (rparen_loc->length > 0) {
7025 end = PM_LOCATION_END(rparen_loc);
7026 } else if (arguments != NULL) {
7027 end = PM_NODE_END(arguments);
7028 } else if (lparen_loc->length > 0) {
7029 end = PM_LOCATION_END(lparen_loc);
7031 end = PM_TOKEN_END(parser, keyword);
7034 return pm_yield_node_new(
7038 ((pm_location_t) { .start = start, .length = U32(end - start) }),
7039 TOK2LOC(parser, keyword),
7051pm_parser_local_depth_constant_id(pm_parser_t *parser, pm_constant_id_t constant_id) {
7052 pm_scope_t *scope = parser->current_scope;
7055 while (scope != NULL) {
7056 if (pm_locals_find(&scope->locals, constant_id) != UINT32_MAX) return depth;
7057 if (scope->closed) break;
7059 scope = scope->previous;
7072pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
7073 return pm_parser_local_depth_constant_id(parser, pm_parser_constant_id_token(parser, token));
7080pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7081 pm_locals_write(&parser->current_scope->locals, constant_id, U32(start - parser->start), U32(end - start), reads);
7087static pm_constant_id_t
7088pm_parser_local_add_raw(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, uint32_t reads) {
7089 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
7090 if (constant_id != 0) pm_parser_local_add(parser, constant_id, start, end, reads);
7097static inline pm_constant_id_t
7098pm_parser_local_add_location(pm_parser_t *parser, pm_location_t *location, uint32_t reads) {
7099 return pm_parser_local_add_raw(parser, parser->start + location->start, parser->start + location->start + location->length, reads);
7105static inline pm_constant_id_t
7106pm_parser_local_add_token(pm_parser_t *parser, pm_token_t *token, uint32_t reads) {
7107 return pm_parser_local_add_raw(parser, token->start, token->end, reads);
7113static pm_constant_id_t
7114pm_parser_local_add_owned(pm_parser_t *parser, uint8_t *start, size_t length) {
7115 pm_constant_id_t constant_id = pm_parser_constant_id_owned(parser, start, length);
7116 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7123static pm_constant_id_t
7124pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
7125 pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
7126 if (constant_id != 0) pm_parser_local_add(parser, constant_id, parser->start, parser->start, 1);
7138pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
7139 // We want to check whether the parameter name is a numbered parameter or
7141 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, name), PM_TOKEN_LENGTH(name));
7143 // Otherwise we'll fetch the constant id for the parameter name and check
7144 // whether it's already in the current scope.
7145 pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
7147 if (pm_locals_find(&parser->current_scope->locals, constant_id) != UINT32_MAX) {
7148 // Add an error if the parameter doesn't start with _ and has been seen before
7149 if ((name->start < name->end) && (*name->start != '_')) {
7150 pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_DUPLICATED);
7161pm_parser_scope_pop(pm_parser_t *parser) {
7162 pm_scope_t *scope = parser->current_scope;
7163 parser->current_scope = scope->previous;
7164 pm_locals_free(&scope->locals);
7165 xfree_sized(scope, sizeof(pm_scope_t));
7168/******************************************************************************/
7170/******************************************************************************/
7176pm_state_stack_push(pm_state_stack_t *stack, bool value) {
7177 *stack = (*stack << 1) | (value & 1);
7184pm_state_stack_pop(pm_state_stack_t *stack) {
7192pm_state_stack_p(const pm_state_stack_t *stack) {
7197pm_accepts_block_stack_push(pm_parser_t *parser, bool value) {
7198 // Use the negation of the value to prevent stack overflow.
7199 pm_state_stack_push(&parser->accepts_block_stack, !value);
7203pm_accepts_block_stack_pop(pm_parser_t *parser) {
7204 pm_state_stack_pop(&parser->accepts_block_stack);
7208pm_accepts_block_stack_p(pm_parser_t *parser) {
7209 return !pm_state_stack_p(&parser->accepts_block_stack);
7213pm_do_loop_stack_push(pm_parser_t *parser, bool value) {
7214 pm_state_stack_push(&parser->do_loop_stack, value);
7218pm_do_loop_stack_pop(pm_parser_t *parser) {
7219 pm_state_stack_pop(&parser->do_loop_stack);
7223pm_do_loop_stack_p(pm_parser_t *parser) {
7224 return pm_state_stack_p(&parser->do_loop_stack);
7227/******************************************************************************/
7228/* Lexer check helpers */
7229/******************************************************************************/
7235static inline uint8_t
7236peek_at(const pm_parser_t *parser, const uint8_t *cursor) {
7237 if (cursor < parser->end) {
7249static inline uint8_t
7250peek_offset(pm_parser_t *parser, ptrdiff_t offset) {
7251 return peek_at(parser, parser->current.end + offset);
7258static inline uint8_t
7259peek(const pm_parser_t *parser) {
7260 return peek_at(parser, parser->current.end);
7268match(pm_parser_t *parser, uint8_t value) {
7269 if (peek(parser) == value) {
7270 parser->current.end++;
7281match_eol_at(pm_parser_t *parser, const uint8_t *cursor) {
7282 if (peek_at(parser, cursor) == '\n') {
7285 if (peek_at(parser, cursor) == '\r' && peek_at(parser, cursor + 1) == '\n') {
7297match_eol_offset(pm_parser_t *parser, ptrdiff_t offset) {
7298 return match_eol_at(parser, parser->current.end + offset);
7307match_eol(pm_parser_t *parser) {
7308 return match_eol_at(parser, parser->current.end);
7314static inline const uint8_t *
7315next_newline(const uint8_t *cursor, ptrdiff_t length) {
7316 assert(length >= 0);
7318 // Note that it's okay for us to use memchr here to look for \n because none
7319 // of the encodings that we support have \n as a component of a multi-byte
7321 return memchr(cursor, '\n', (size_t) length);
7328ambiguous_operator_p(const pm_parser_t *parser, bool space_seen) {
7329 return !lex_state_p(parser, PM_LEX_STATE_CLASS | PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME | PM_LEX_STATE_ENDFN) && space_seen && !pm_char_is_whitespace(peek(parser));
7337parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
7338 const pm_encoding_t *encoding = pm_encoding_find(start, end);
7340 if (encoding != NULL) {
7341 if (parser->encoding != encoding) {
7342 parser->encoding = encoding;
7343 if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser);
7346 parser->encoding_changed = (encoding != PM_ENCODING_UTF_8_ENTRY);
7358parser_lex_magic_comment_encoding(pm_parser_t *parser) {
7359 const uint8_t *cursor = parser->current.start + 1;
7360 const uint8_t *end = parser->current.end;
7362 bool separator = false;
7364 if (end - cursor <= 6) return;
7365 switch (cursor[6]) {
7366 case 'C': case 'c': cursor += 6; continue;
7367 case 'O': case 'o': cursor += 5; continue;
7368 case 'D': case 'd': cursor += 4; continue;
7369 case 'I': case 'i': cursor += 3; continue;
7370 case 'N': case 'n': cursor += 2; continue;
7371 case 'G': case 'g': cursor += 1; continue;
7378 if (pm_char_is_whitespace(*cursor)) break;
7381 if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
7387 if (++cursor >= end) return;
7388 } while (pm_char_is_whitespace(*cursor));
7390 if (separator) break;
7391 if (*cursor != '=' && *cursor != ':') return;
7397 const uint8_t *value_start = cursor;
7398 while ((*cursor == '-' || *cursor == '_' || parser->encoding->alnum_char(cursor, 1)) && ++cursor < end);
7400 if (!parser_lex_magic_comment_encoding_value(parser, value_start, cursor)) {
7401 // If we were unable to parse the encoding value, then we've got an
7402 // issue because we didn't understand the encoding that the user was
7403 // trying to use. In this case we'll keep using the default encoding but
7404 // add an error to the parser to indicate an unsuccessful parse.
7405 pm_parser_err(parser, U32(value_start - parser->start), U32(cursor - value_start), PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
7410 PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE,
7411 PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE,
7412 PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID
7413} pm_magic_comment_boolean_value_t;
7419static pm_magic_comment_boolean_value_t
7420parser_lex_magic_comment_boolean_value(const uint8_t *value_start, uint32_t value_length) {
7421 if (value_length == 4 && pm_strncasecmp(value_start, (const uint8_t *) "true", 4) == 0) {
7422 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE;
7423 } else if (value_length == 5 && pm_strncasecmp(value_start, (const uint8_t *) "false", 5) == 0) {
7424 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE;
7426 return PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID;
7431pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
7432 return b == '\'' || b == '"' || b == ':' || b == ';';
7440static inline const uint8_t *
7441parser_lex_magic_comment_emacs_marker(
pm_parser_t *parser,
const uint8_t *cursor,
const uint8_t *end) {
7442 while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor,
'-', (
size_t) (end - cursor), parser->
encoding_changed, parser->
encoding)) != NULL) {
7443 if (cursor + 3 <= end && cursor[1] ==
'*' && cursor[2] ==
'-') {
7462parser_lex_magic_comment(
pm_parser_t *parser,
bool semantic_token_seen) {
7465 const uint8_t *start = parser->
current.start + 1;
7466 const uint8_t *end = parser->
current.end;
7467 if (end - start <= 7)
return false;
7469 const uint8_t *cursor;
7470 bool indicator =
false;
7472 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7475 if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
7486 while (cursor < end) {
7487 while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
7489 const uint8_t *key_start = cursor;
7490 while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
7492 const uint8_t *key_end = cursor;
7493 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7494 if (cursor == end)
break;
7496 if (*cursor ==
':') {
7499 if (!indicator)
return false;
7503 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7504 if (cursor == end)
break;
7506 const uint8_t *value_start;
7507 const uint8_t *value_end;
7509 if (*cursor ==
'"') {
7510 value_start = ++cursor;
7511 for (; cursor < end && *cursor !=
'"'; cursor++) {
7512 if (*cursor ==
'\\' && (cursor + 1 < end)) cursor++;
7515 if (cursor < end && *cursor ==
'"') cursor++;
7517 value_start = cursor;
7518 while (cursor < end && *cursor !=
'"' && *cursor !=
';' && !pm_char_is_whitespace(*cursor)) cursor++;
7523 while (cursor < end && (*cursor ==
';' || pm_char_is_whitespace(*cursor))) cursor++;
7525 while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
7526 if (cursor != end)
return false;
7532 const size_t key_length = (size_t) (key_end - key_start);
7536 pm_string_shared_init(&key, key_start, key_end);
7538 uint8_t *buffer =
xmalloc(key_length);
7539 if (buffer == NULL)
break;
7541 memcpy(buffer, key_start, key_length);
7542 buffer[dash - key_start] =
'_';
7544 while ((dash = pm_memchr(dash + 1,
'-', (
size_t) (key_end - dash - 1), parser->
encoding_changed, parser->
encoding)) != NULL) {
7545 buffer[dash - key_start] =
'_';
7548 pm_string_owned_init(&key, buffer, key_length);
7554 uint32_t value_length = (uint32_t) (value_end - value_start);
7560 (key_length == 8 && pm_strncasecmp(key_source, (
const uint8_t *)
"encoding", 8) == 0) ||
7561 (key_length == 6 && pm_strncasecmp(key_source, (
const uint8_t *)
"coding", 6) == 0)
7563 result = parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
7567 if (key_length == 11) {
7568 if (pm_strncasecmp(key_source, (
const uint8_t *)
"warn_indent", 11) == 0) {
7569 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7570 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7571 PM_PARSER_WARN_TOKEN_FORMAT(
7574 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7576 (
const char *) key_source,
7578 (
const char *) value_start
7581 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7584 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7589 }
else if (key_length == 21) {
7590 if (pm_strncasecmp(key_source, (
const uint8_t *)
"frozen_string_literal", 21) == 0) {
7593 if (semantic_token_seen) {
7594 pm_parser_warn_token(parser, &parser->
current, PM_WARN_IGNORED_FROZEN_STRING_LITERAL);
7596 switch (parser_lex_magic_comment_boolean_value(value_start, value_length)) {
7597 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_INVALID:
7598 PM_PARSER_WARN_TOKEN_FORMAT(
7601 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7603 (
const char *) key_source,
7605 (
const char *) value_start
7608 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_FALSE:
7611 case PM_MAGIC_COMMENT_BOOLEAN_VALUE_TRUE:
7617 }
else if (key_length == 24) {
7618 if (pm_strncasecmp(key_source, (
const uint8_t *)
"shareable_constant_value", 24) == 0) {
7619 const uint8_t *cursor = parser->
current.start;
7620 while ((cursor > parser->
start) && ((cursor[-1] ==
' ') || (cursor[-1] ==
'\t'))) cursor--;
7622 if (!((cursor == parser->
start) || (cursor[-1] ==
'\n'))) {
7623 pm_parser_warn_token(parser, &parser->
current, PM_WARN_SHAREABLE_CONSTANT_VALUE_LINE);
7624 }
else if (value_length == 4 && pm_strncasecmp(value_start, (
const uint8_t *)
"none", 4) == 0) {
7625 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_NONE);
7626 }
else if (value_length == 7 && pm_strncasecmp(value_start, (
const uint8_t *)
"literal", 7) == 0) {
7627 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_LITERAL);
7628 }
else if (value_length == 23 && pm_strncasecmp(value_start, (
const uint8_t *)
"experimental_everything", 23) == 0) {
7629 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_EVERYTHING);
7630 }
else if (value_length == 17 && pm_strncasecmp(value_start, (
const uint8_t *)
"experimental_copy", 17) == 0) {
7631 pm_parser_scope_shareable_constant_set(parser, PM_SCOPE_SHAREABLE_CONSTANT_EXPERIMENTAL_COPY);
7633 PM_PARSER_WARN_TOKEN_FORMAT(
7636 PM_WARN_INVALID_MAGIC_COMMENT_VALUE,
7638 (
const char *) key_source,
7640 (
const char *) value_start
7666static const uint32_t context_terminators[] = {
7668 [
PM_CONTEXT_BEGIN] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7671 [
PM_CONTEXT_BEGIN_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7673 [
PM_CONTEXT_BLOCK_KEYWORDS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7677 [
PM_CONTEXT_BLOCK_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7678 [
PM_CONTEXT_CASE_WHEN] = (1U << PM_TOKEN_KEYWORD_WHEN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7679 [
PM_CONTEXT_CASE_IN] = (1U << PM_TOKEN_KEYWORD_IN) | (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_ELSE),
7680 [
PM_CONTEXT_CLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7683 [
PM_CONTEXT_CLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7684 [
PM_CONTEXT_DEF] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7687 [
PM_CONTEXT_DEF_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7692 [
PM_CONTEXT_ELSIF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7696 [
PM_CONTEXT_IF] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_ELSIF) | (1U << PM_TOKEN_KEYWORD_END),
7698 [
PM_CONTEXT_LAMBDA_DO_END] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7701 [
PM_CONTEXT_LAMBDA_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7704 [
PM_CONTEXT_MODULE] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7707 [
PM_CONTEXT_MODULE_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7711 [
PM_CONTEXT_PREDICATE] = (1U << PM_TOKEN_KEYWORD_THEN) | (1U << PM_TOKEN_NEWLINE) | (1U << PM_TOKEN_SEMICOLON),
7714 [
PM_CONTEXT_SCLASS] = (1U << PM_TOKEN_KEYWORD_END) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ENSURE),
7717 [
PM_CONTEXT_SCLASS_RESCUE] = (1U << PM_TOKEN_KEYWORD_ENSURE) | (1U << PM_TOKEN_KEYWORD_RESCUE) | (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7719 [
PM_CONTEXT_UNLESS] = (1U << PM_TOKEN_KEYWORD_ELSE) | (1U << PM_TOKEN_KEYWORD_END),
7726 return token->type < 32 && (context_terminators[context] & (1U << token->type));
7737 while (context_node != NULL) {
7738 if (context_terminator(context_node->
context, token))
return context_node->
context;
7739 context_node = context_node->
prev;
7748 if (context_node == NULL)
return false;
7773 while (context_node != NULL) {
7774 if (context_node->
context == context)
return true;
7775 context_node = context_node->
prev;
7785 while (context_node != NULL) {
7786 switch (context_node->
context) {
7807 context_node = context_node->
prev;
7822 assert(
false &&
"unreachable");
7880 assert(
false &&
"unreachable");
7889pm_strspn_number_validate(
pm_parser_t *parser,
const uint8_t *
string,
size_t length,
const uint8_t *invalid) {
7890 if (invalid != NULL) {
7891 pm_diagnostic_id_t diag_id = (invalid == (
string + length - 1)) ? PM_ERR_INVALID_NUMBER_UNDERSCORE_TRAILING : PM_ERR_INVALID_NUMBER_UNDERSCORE_INNER;
7892 pm_parser_err(parser, U32(invalid - parser->
start), 1, diag_id);
7897pm_strspn_binary_number_validate(
pm_parser_t *parser,
const uint8_t *
string) {
7898 const uint8_t *invalid = NULL;
7899 size_t length = pm_strspn_binary_number(
string, parser->
end -
string, &invalid);
7900 pm_strspn_number_validate(parser,
string, length, invalid);
7905pm_strspn_octal_number_validate(
pm_parser_t *parser,
const uint8_t *
string) {
7906 const uint8_t *invalid = NULL;
7907 size_t length = pm_strspn_octal_number(
string, parser->
end -
string, &invalid);
7908 pm_strspn_number_validate(parser,
string, length, invalid);
7913pm_strspn_decimal_number_validate(
pm_parser_t *parser,
const uint8_t *
string) {
7914 const uint8_t *invalid = NULL;
7915 size_t length = pm_strspn_decimal_number(
string, parser->
end -
string, &invalid);
7916 pm_strspn_number_validate(parser,
string, length, invalid);
7921pm_strspn_hexadecimal_number_validate(
pm_parser_t *parser,
const uint8_t *
string) {
7922 const uint8_t *invalid = NULL;
7923 size_t length = pm_strspn_hexadecimal_number(
string, parser->
end -
string, &invalid);
7924 pm_strspn_number_validate(parser,
string, length, invalid);
7928static pm_token_type_t
7929lex_optional_float_suffix(
pm_parser_t *parser,
bool* seen_e) {
7930 pm_token_type_t
type = PM_TOKEN_INTEGER;
7934 if (peek(parser) ==
'.') {
7935 if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7937 parser->
current.end += pm_strspn_decimal_number_validate(parser, parser->
current.end);
7938 type = PM_TOKEN_FLOAT;
7948 if ((peek(parser) ==
'e') || (peek(parser) ==
'E')) {
7949 if ((peek_offset(parser, 1) ==
'+') || (peek_offset(parser, 1) ==
'-')) {
7952 if (pm_char_is_decimal_digit(peek(parser))) {
7954 parser->
current.end += pm_strspn_decimal_number_validate(parser, parser->
current.end);
7956 pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
7958 }
else if (pm_char_is_decimal_digit(peek_offset(parser, 1))) {
7960 parser->
current.end += pm_strspn_decimal_number_validate(parser, parser->
current.end);
7966 type = PM_TOKEN_FLOAT;
7972static pm_token_type_t
7973lex_numeric_prefix(
pm_parser_t *parser,
bool* seen_e) {
7974 pm_token_type_t
type = PM_TOKEN_INTEGER;
7977 if (peek_offset(parser, -1) ==
'0') {
7978 switch (*parser->
current.end) {
7983 if (pm_char_is_decimal_digit(peek(parser))) {
7984 parser->
current.end += pm_strspn_decimal_number_validate(parser, parser->
current.end);
7987 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
7996 if (pm_char_is_binary_digit(peek(parser))) {
7997 parser->
current.end += pm_strspn_binary_number_validate(parser, parser->
current.end);
8000 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
8010 if (pm_char_is_octal_digit(peek(parser))) {
8011 parser->
current.end += pm_strspn_octal_number_validate(parser, parser->
current.end);
8014 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
8030 parser->
current.end += pm_strspn_octal_number_validate(parser, parser->
current.end);
8038 if (pm_char_is_hexadecimal_digit(peek(parser))) {
8039 parser->
current.end += pm_strspn_hexadecimal_number_validate(parser, parser->
current.end);
8042 pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
8045 parser->
integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
8050 type = lex_optional_float_suffix(parser, seen_e);
8057 type = lex_optional_float_suffix(parser, seen_e);
8064 parser->
current.end += pm_strspn_decimal_number_validate(parser, parser->
current.end);
8067 type = lex_optional_float_suffix(parser, seen_e);
8073 if (peek_offset(parser, 0) ==
'.' && pm_char_is_decimal_digit(peek_offset(parser, 1))) {
8074 const uint8_t *fraction_start = parser->
current.end;
8075 const uint8_t *fraction_end = parser->
current.end + 2;
8076 fraction_end += pm_strspn_decimal_digit(fraction_end, parser->
end - fraction_end);
8077 pm_parser_err(parser, U32(fraction_start - parser->
start), U32(fraction_end - fraction_start), PM_ERR_INVALID_NUMBER_FRACTION);
8083static pm_token_type_t
8085 pm_token_type_t
type = PM_TOKEN_INTEGER;
8089 bool seen_e =
false;
8090 type = lex_numeric_prefix(parser, &seen_e);
8092 const uint8_t *end = parser->
current.end;
8093 pm_token_type_t suffix_type =
type;
8095 if (
type == PM_TOKEN_INTEGER) {
8096 if (match(parser,
'r')) {
8097 suffix_type = PM_TOKEN_INTEGER_RATIONAL;
8099 if (match(parser,
'i')) {
8100 suffix_type = PM_TOKEN_INTEGER_RATIONAL_IMAGINARY;
8102 }
else if (match(parser,
'i')) {
8103 suffix_type = PM_TOKEN_INTEGER_IMAGINARY;
8106 if (!seen_e && match(parser,
'r')) {
8107 suffix_type = PM_TOKEN_FLOAT_RATIONAL;
8109 if (match(parser,
'i')) {
8110 suffix_type = PM_TOKEN_FLOAT_RATIONAL_IMAGINARY;
8112 }
else if (match(parser,
'i')) {
8113 suffix_type = PM_TOKEN_FLOAT_IMAGINARY;
8117 const uint8_t b = peek(parser);
8118 if (b !=
'\0' && (b >= 0x80 || ((b >=
'a' && b <=
'z') || (b >=
'A' && b <=
'Z')) || b ==
'_')) {
8128static pm_token_type_t
8131 pm_parser_err_token(parser, &parser->
current, PM_ERR_GLOBAL_VARIABLE_BARE);
8132 return PM_TOKEN_GLOBAL_VARIABLE;
8137 bool allow_multiple =
true;
8139 switch (*parser->
current.end) {
8157 return PM_TOKEN_GLOBAL_VARIABLE;
8164 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_BACK_REFERENCE;
8170 if ((width = char_is_identifier(parser, parser->
current.end, parser->
end - parser->
current.end)) > 0) {
8173 }
while ((width = char_is_identifier(parser, parser->
current.end, parser->
end - parser->
current.end)) > 0);
8177 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->
current, diag_id);
8180 return PM_TOKEN_GLOBAL_VARIABLE;
8193 return lex_state_p(parser, PM_LEX_STATE_FNAME) ? PM_TOKEN_GLOBAL_VARIABLE : PM_TOKEN_NUMBERED_REFERENCE;
8197 allow_multiple =
false;
8202 if ((width = char_is_identifier(parser, parser->
current.end, parser->
end - parser->
current.end)) > 0) {
8205 }
while (allow_multiple && (width = char_is_identifier(parser, parser->
current.end, parser->
end - parser->
current.end)) > 0);
8206 }
else if (pm_char_is_whitespace(peek(parser))) {
8209 pm_parser_err_token(parser, &parser->
current, PM_ERR_GLOBAL_VARIABLE_BARE);
8215 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current) + U32(width), diag_id, (
int) (PM_TOKEN_LENGTH(&parser->
current) + U32(width)), (
const char *) parser->
current.start);
8218 return PM_TOKEN_GLOBAL_VARIABLE;
8235static inline pm_token_type_t
8236lex_keyword(
pm_parser_t *parser,
const uint8_t *current_start,
const char *value,
size_t vlen,
pm_lex_state_t state, pm_token_type_t
type, pm_token_type_t modifier_type) {
8237 if (memcmp(current_start, value, vlen) == 0) {
8240 if (parser->
lex_state & PM_LEX_STATE_FNAME) {
8241 lex_state_set(parser, PM_LEX_STATE_ENDFN);
8243 lex_state_set(parser, state);
8244 if (state == PM_LEX_STATE_BEG) {
8248 if ((modifier_type != PM_TOKEN_EOF) && !(last_state & (PM_LEX_STATE_BEG | PM_LEX_STATE_LABELED | PM_LEX_STATE_CLASS))) {
8249 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
8250 return modifier_type;
8257 return PM_TOKEN_EOF;
8260static pm_token_type_t
8261lex_identifier(
pm_parser_t *parser,
bool previous_command_start) {
8264 const uint8_t *end = parser->
end;
8265 const uint8_t *current_start = parser->
current.start;
8266 const uint8_t *current_end = parser->
current.end;
8269 if (encoding_changed) {
8270 while ((width = char_is_identifier(parser, current_end, end - current_end)) > 0) {
8271 current_end += width;
8274 while ((width = char_is_identifier_utf8(current_end, end - current_end)) > 0) {
8275 current_end += width;
8278 parser->
current.end = current_end;
8282 width = (size_t) (current_end - current_start);
8284 if (current_end < end) {
8285 if (((current_end + 1 >= end) || (current_end[1] !=
'=')) && (match(parser,
'!') || match(parser,
'?'))) {
8291 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8292 (peek(parser) ==
':') && (peek_offset(parser, 1) !=
':')
8296 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8297 (void) match(parser,
':');
8298 return PM_TOKEN_LABEL;
8301 if (parser->
lex_state != PM_LEX_STATE_DOT) {
8302 if (width == 8 && (lex_keyword(parser, current_start,
"defined?", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_DEFINED, PM_TOKEN_EOF) != PM_TOKEN_EOF)) {
8303 return PM_TOKEN_KEYWORD_DEFINED;
8307 return PM_TOKEN_METHOD_NAME;
8310 if (lex_state_p(parser, PM_LEX_STATE_FNAME) && peek_offset(parser, 1) !=
'~' && peek_offset(parser, 1) !=
'>' && (peek_offset(parser, 1) !=
'=' || peek_offset(parser, 2) ==
'>') && match(parser,
'=')) {
8313 return PM_TOKEN_IDENTIFIER;
8317 ((lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser)) &&
8318 peek(parser) ==
':' && peek_offset(parser, 1) !=
':'
8322 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
8323 (void) match(parser,
':');
8324 return PM_TOKEN_LABEL;
8328 if (parser->
lex_state != PM_LEX_STATE_DOT) {
8329 pm_token_type_t
type;
8332 if (lex_keyword(parser, current_start,
"do", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_DO, PM_TOKEN_EOF) != PM_TOKEN_EOF) {
8333 if (pm_do_loop_stack_p(parser)) {
8334 return PM_TOKEN_KEYWORD_DO_LOOP;
8336 return PM_TOKEN_KEYWORD_DO;
8339 if ((
type = lex_keyword(parser, current_start,
"if", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IF, PM_TOKEN_KEYWORD_IF_MODIFIER)) != PM_TOKEN_EOF)
return type;
8340 if ((
type = lex_keyword(parser, current_start,
"in", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_IN, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8341 if ((
type = lex_keyword(parser, current_start,
"or", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_OR, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8344 if ((
type = lex_keyword(parser, current_start,
"and", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_AND, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8345 if ((
type = lex_keyword(parser, current_start,
"def", width, PM_LEX_STATE_FNAME, PM_TOKEN_KEYWORD_DEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8346 if ((
type = lex_keyword(parser, current_start,
"end", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8347 if ((
type = lex_keyword(parser, current_start,
"END", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_END_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8348 if ((
type = lex_keyword(parser, current_start,
"for", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_FOR, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8349 if ((
type = lex_keyword(parser, current_start,
"nil", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_NIL, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8350 if ((
type = lex_keyword(parser, current_start,
"not", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_NOT, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8353 if ((
type = lex_keyword(parser, current_start,
"case", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_CASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8354 if ((
type = lex_keyword(parser, current_start,
"else", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8355 if ((
type = lex_keyword(parser, current_start,
"next", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_NEXT, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8356 if ((
type = lex_keyword(parser, current_start,
"redo", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_REDO, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8357 if ((
type = lex_keyword(parser, current_start,
"self", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_SELF, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8358 if ((
type = lex_keyword(parser, current_start,
"then", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8359 if ((
type = lex_keyword(parser, current_start,
"true", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_TRUE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8360 if ((
type = lex_keyword(parser, current_start,
"when", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8363 if ((
type = lex_keyword(parser, current_start,
"alias", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_ALIAS, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8364 if ((
type = lex_keyword(parser, current_start,
"begin", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_BEGIN, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8365 if ((
type = lex_keyword(parser, current_start,
"BEGIN", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_BEGIN_UPCASE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8366 if ((
type = lex_keyword(parser, current_start,
"break", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_BREAK, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8367 if ((
type = lex_keyword(parser, current_start,
"class", width, PM_LEX_STATE_CLASS, PM_TOKEN_KEYWORD_CLASS, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8368 if ((
type = lex_keyword(parser, current_start,
"elsif", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8369 if ((
type = lex_keyword(parser, current_start,
"false", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_FALSE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8370 if ((
type = lex_keyword(parser, current_start,
"retry", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD_RETRY, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8371 if ((
type = lex_keyword(parser, current_start,
"super", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_SUPER, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8372 if ((
type = lex_keyword(parser, current_start,
"undef", width, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM, PM_TOKEN_KEYWORD_UNDEF, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8373 if ((
type = lex_keyword(parser, current_start,
"until", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNTIL, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) != PM_TOKEN_EOF)
return type;
8374 if ((
type = lex_keyword(parser, current_start,
"while", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_WHILE, PM_TOKEN_KEYWORD_WHILE_MODIFIER)) != PM_TOKEN_EOF)
return type;
8375 if ((
type = lex_keyword(parser, current_start,
"yield", width, PM_LEX_STATE_ARG, PM_TOKEN_KEYWORD_YIELD, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8378 if ((
type = lex_keyword(parser, current_start,
"ensure", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8379 if ((
type = lex_keyword(parser, current_start,
"module", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_MODULE, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8380 if ((
type = lex_keyword(parser, current_start,
"rescue", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) != PM_TOKEN_EOF)
return type;
8381 if ((
type = lex_keyword(parser, current_start,
"return", width, PM_LEX_STATE_MID, PM_TOKEN_KEYWORD_RETURN, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8382 if ((
type = lex_keyword(parser, current_start,
"unless", width, PM_LEX_STATE_BEG, PM_TOKEN_KEYWORD_UNLESS, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) != PM_TOKEN_EOF)
return type;
8385 if ((
type = lex_keyword(parser, current_start,
"__LINE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___LINE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8386 if ((
type = lex_keyword(parser, current_start,
"__FILE__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___FILE__, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8389 if ((
type = lex_keyword(parser, current_start,
"__ENCODING__", width, PM_LEX_STATE_END, PM_TOKEN_KEYWORD___ENCODING__, PM_TOKEN_EOF)) != PM_TOKEN_EOF)
return type;
8394 if (encoding_changed) {
8395 return parser->
encoding->
isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8397 return pm_encoding_utf_8_isupper_char(current_start, end - current_start) ? PM_TOKEN_CONSTANT : PM_TOKEN_IDENTIFIER;
8423static pm_token_type_t
8424lex_interpolation(
pm_parser_t *parser,
const uint8_t *pound) {
8427 if (pound + 1 >= parser->
end) {
8428 parser->
current.end = pound + 1;
8429 return PM_TOKEN_STRING_CONTENT;
8438 if (pound + 2 >= parser->
end) {
8439 parser->
current.end = pound + 1;
8440 return PM_TOKEN_STRING_CONTENT;
8445 const uint8_t *variable = pound + 2;
8446 if (*variable ==
'@' && pound + 3 < parser->
end) variable++;
8448 if (char_is_identifier_start(parser, variable, parser->
end - variable)) {
8452 if (pound > parser->
current.start) {
8454 return PM_TOKEN_STRING_CONTENT;
8459 lex_mode_push(parser, (
pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8460 parser->
current.end = pound + 1;
8461 return PM_TOKEN_EMBVAR;
8467 parser->
current.end = pound + 1;
8473 if (pound + 2 >= parser->
end) {
8474 parser->
current.end = pound + 1;
8475 return PM_TOKEN_STRING_CONTENT;
8481 const uint8_t *check = pound + 2;
8483 if (pound[2] ==
'-') {
8484 if (pound + 3 >= parser->
end) {
8485 parser->
current.end = pound + 2;
8486 return PM_TOKEN_STRING_CONTENT;
8497 char_is_identifier_start(parser, check, parser->
end - check) ||
8498 (pound[2] !=
'-' && (pm_char_is_decimal_digit(pound[2]) || char_is_global_name_punctuation(pound[2])))
8503 if (pound > parser->
current.start) {
8505 return PM_TOKEN_STRING_CONTENT;
8510 lex_mode_push(parser, (
pm_lex_mode_t) { .mode = PM_LEX_EMBVAR });
8511 parser->
current.end = pound + 1;
8512 return PM_TOKEN_EMBVAR;
8517 parser->
current.end = pound + 1;
8523 if (pound > parser->
current.start) {
8525 return PM_TOKEN_STRING_CONTENT;
8532 lex_mode_push(parser, (
pm_lex_mode_t) { .mode = PM_LEX_EMBEXPR });
8533 parser->
current.end = pound + 2;
8535 pm_do_loop_stack_push(parser,
false);
8536 return PM_TOKEN_EMBEXPR_BEGIN;
8541 parser->
current.end = pound + 1;
8546static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
8547static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
8548static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
8549static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
8550static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
8555static const bool ascii_printable_chars[] = {
8556 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
8557 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
8558 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8559 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8560 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8561 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
8562 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
8563 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
8567char_is_ascii_printable(
const uint8_t b) {
8568 return (b < 0x80) && ascii_printable_chars[b];
8575static inline uint8_t
8576escape_hexadecimal_digit(
const uint8_t value) {
8577 return (uint8_t) ((value <=
'9') ? (value -
'0') : (value & 0x7) + 9);
8585static inline uint32_t
8588 for (
size_t index = 0; index < length; index++) {
8589 if (index != 0) value <<= 4;
8590 value |= escape_hexadecimal_digit(
string[index]);
8595 if (value >= 0xD800 && value <= 0xDFFF) {
8596 if (error_location != NULL) {
8597 pm_parser_err(parser, error_location->
start, error_location->
length, PM_ERR_ESCAPE_INVALID_UNICODE);
8599 pm_parser_err(parser, U32(
string - parser->
start), U32(length), PM_ERR_ESCAPE_INVALID_UNICODE);
8610static inline uint8_t
8611escape_byte(uint8_t value,
const uint8_t flags) {
8612 if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x9f;
8613 if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
8621escape_write_unicode(
pm_parser_t *parser,
pm_buffer_t *buffer,
const uint8_t flags,
const uint8_t *start,
const uint8_t *end, uint32_t value) {
8625 if (value >= 0x80 || flags & PM_ESCAPE_FLAG_SINGLE) {
8627 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->
start), U32(end - start), PM_ERR_MIXED_ENCODING, parser->
explicit_encoding->
name);
8633 if (!pm_buffer_append_unicode_codepoint(buffer, value)) {
8634 pm_parser_err(parser, U32(start - parser->
start), U32(end - start), PM_ERR_ESCAPE_INVALID_UNICODE);
8635 pm_buffer_append_byte(buffer, 0xEF);
8636 pm_buffer_append_byte(buffer, 0xBF);
8637 pm_buffer_append_byte(buffer, 0xBD);
8649 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_MIXED_ENCODING, parser->
encoding->
name);
8655 pm_buffer_append_byte(buffer,
byte);
8675 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8676 pm_buffer_append_format(regular_expression_buffer,
"\\x%02X",
byte);
8679 escape_write_byte_encoded(parser, buffer,
byte);
8691 width = pm_encoding_utf_8_char_width(parser->
current.end, parser->
end - parser->
current.end);
8695 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(*parser->
current.end++, flags));
8696 }
else if (width > 1) {
8698 pm_buffer_t *b = (flags & PM_ESCAPE_FLAG_REGEXP) ? regular_expression_buffer : buffer;
8699 pm_buffer_append_bytes(b, parser->
current.end, width);
8705 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8715escape_read_warn(
pm_parser_t *parser, uint8_t flags, uint8_t flag,
const char *
type) {
8716#define FLAG(value) ((value & PM_ESCAPE_FLAG_CONTROL) ? "\\C-" : (value & PM_ESCAPE_FLAG_META) ? "\\M-" : "")
8718 PM_PARSER_WARN_TOKEN_FORMAT(
8721 PM_WARN_INVALID_CHARACTER,
8735 uint8_t peeked = peek(parser);
8739 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\\', flags));
8744 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\'', flags));
8749 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\a', flags));
8754 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\b', flags));
8759 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\033', flags));
8764 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\f', flags));
8769 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\n', flags));
8774 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\r', flags));
8779 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
' ', flags));
8784 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\t', flags));
8789 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(
'\v', flags));
8792 case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
8793 uint8_t value = (uint8_t) (*parser->
current.end -
'0');
8796 if (pm_char_is_octal_digit(peek(parser))) {
8797 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->
current.end -
'0'));
8800 if (pm_char_is_octal_digit(peek(parser))) {
8801 value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->
current.end -
'0'));
8806 value = escape_byte(value, flags);
8807 escape_write_byte(parser, buffer, regular_expression_buffer, flags, value);
8811 const uint8_t *start = parser->
current.end - 1;
8814 uint8_t
byte = peek(parser);
8816 if (pm_char_is_hexadecimal_digit(
byte)) {
8817 uint8_t value = escape_hexadecimal_digit(
byte);
8820 byte = peek(parser);
8821 if (pm_char_is_hexadecimal_digit(
byte)) {
8822 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(
byte));
8826 value = escape_byte(value, flags);
8827 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8828 if (flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) {
8829 pm_buffer_append_format(regular_expression_buffer,
"\\x%02X", value);
8831 pm_buffer_append_bytes(regular_expression_buffer, start, (
size_t) (parser->
current.end - start));
8835 escape_write_byte_encoded(parser, buffer, value);
8837 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
8843 const uint8_t *start = parser->
current.end - 1;
8847 const uint8_t *start = parser->
current.end - 2;
8848 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->
start), U32(parser->
current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8849 }
else if (peek(parser) ==
'{') {
8850 const uint8_t *unicode_codepoints_start = parser->
current.end - 2;
8855 if ((whitespace = pm_strspn_inline_whitespace(parser->
current.end, parser->
end - parser->
current.end)) > 0) {
8856 parser->
current.end += whitespace;
8857 }
else if (peek(parser) ==
'\\' && peek_offset(parser, 1) ==
'n') {
8868 const uint8_t *extra_codepoints_start = NULL;
8869 int codepoints_count = 0;
8872 const uint8_t *unicode_start = parser->
current.end;
8873 size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->
current.end, parser->
end - parser->
current.end);
8875 if (hexadecimal_length > 6) {
8877 pm_parser_err(parser, U32(unicode_start - parser->
start), U32(hexadecimal_length), PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
8878 }
else if (hexadecimal_length == 0) {
8881 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8885 pm_buffer_append_bytes(regular_expression_buffer, start, (
size_t) (parser->
current.end - start));
8887 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->
current), 0, PM_ERR_ESCAPE_INVALID_UNICODE);
8888 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->
current), 0, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8894 parser->
current.end += hexadecimal_length;
8896 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
8897 extra_codepoints_start = unicode_start;
8900 uint32_t value = escape_unicode(parser, unicode_start, hexadecimal_length, NULL);
8901 escape_write_unicode(parser, buffer, flags, unicode_start, parser->
current.end, value);
8908 if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
8909 pm_parser_err(parser, U32(extra_codepoints_start - parser->
start), U32(parser->
current.end - 1 - extra_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
8913 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->
start), U32(parser->
current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_LIST, (
int) (parser->
current.end - start), start);
8914 }
else if (peek(parser) ==
'}') {
8917 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8921 pm_buffer_append_bytes(regular_expression_buffer, start, (
size_t) (parser->
current.end - start));
8923 pm_parser_err(parser, U32(unicode_codepoints_start - parser->
start), U32(parser->
current.end - unicode_codepoints_start), PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
8927 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8928 pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (
size_t) (parser->
current.end - unicode_codepoints_start));
8931 size_t length = pm_strspn_hexadecimal_digit(parser->
current.end, MIN(parser->
end - parser->
current.end, 4));
8934 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8935 pm_buffer_append_bytes(regular_expression_buffer, start, (
size_t) (parser->
current.end - start));
8937 const uint8_t *start = parser->
current.end - 2;
8938 PM_PARSER_ERR_FORMAT(parser, U32(start - parser->
start), U32(parser->
current.end - start), PM_ERR_ESCAPE_INVALID_UNICODE_SHORT, 2, start);
8940 }
else if (length == 4) {
8941 uint32_t value = escape_unicode(parser, parser->
current.end, 4, NULL);
8943 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8944 pm_buffer_append_bytes(regular_expression_buffer, start, (
size_t) (parser->
current.end + 4 - start));
8947 escape_write_unicode(parser, buffer, flags, start, parser->
current.end + 4, value);
8950 parser->
current.end += length;
8952 if (flags & PM_ESCAPE_FLAG_REGEXP) {
8956 pm_buffer_append_bytes(regular_expression_buffer, start, (
size_t) (parser->
current.end - start));
8958 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
8967 if (flags & PM_ESCAPE_FLAG_CONTROL) {
8968 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
8972 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
8976 uint8_t peeked = peek(parser);
8980 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
8986 if (match(parser,
'u') || match(parser,
'U')) {
8987 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current), PM_ERR_INVALID_ESCAPE_CHARACTER);
8991 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
8995 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL,
"\\s");
8996 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9000 escape_read_warn(parser, flags, 0,
"\\t");
9001 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9004 if (!char_is_ascii_printable(peeked)) {
9005 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9010 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9017 if (flags & PM_ESCAPE_FLAG_CONTROL) {
9018 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
9021 if (peek(parser) !=
'-') {
9023 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9029 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
9033 uint8_t peeked = peek(parser);
9037 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
9043 if (match(parser,
'u') || match(parser,
'U')) {
9044 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9048 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
9052 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_CONTROL,
"\\s");
9053 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9057 escape_read_warn(parser, flags, 0,
"\\t");
9058 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9061 if (!char_is_ascii_printable(peeked)) {
9063 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current) + U32(width), PM_ERR_ESCAPE_INVALID_CONTROL);
9068 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
9075 if (flags & PM_ESCAPE_FLAG_META) {
9076 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
9079 if (peek(parser) !=
'-') {
9081 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9087 pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
9091 uint8_t peeked = peek(parser);
9096 if (match(parser,
'u') || match(parser,
'U')) {
9097 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current), PM_ERR_INVALID_ESCAPE_CHARACTER);
9101 escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
9105 escape_read_warn(parser, flags, PM_ESCAPE_FLAG_META,
"\\s");
9106 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9110 escape_read_warn(parser, flags & ((uint8_t) ~PM_ESCAPE_FLAG_CONTROL), PM_ESCAPE_FLAG_META,
"\\t");
9111 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9114 if (!char_is_ascii_printable(peeked)) {
9116 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9121 escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
9126 if (peek_offset(parser, 1) ==
'\n') {
9128 escape_write_byte_encoded(parser, buffer, escape_byte(
'\n', flags));
9134 if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
9136 pm_parser_err(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current) + U32(width), PM_ERR_ESCAPE_INVALID_META);
9140 escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
9142 pm_parser_err_current(parser, PM_ERR_INVALID_ESCAPE_CHARACTER);
9174static pm_token_type_t
9176 if (lex_state_end_p(parser)) {
9177 lex_state_set(parser, PM_LEX_STATE_BEG);
9178 return PM_TOKEN_QUESTION_MARK;
9182 pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
9184 return PM_TOKEN_CHARACTER_LITERAL;
9187 if (pm_char_is_whitespace(*parser->
current.end)) {
9188 lex_state_set(parser, PM_LEX_STATE_BEG);
9189 return PM_TOKEN_QUESTION_MARK;
9192 lex_state_set(parser, PM_LEX_STATE_BEG);
9194 if (match(parser,
'\\')) {
9195 lex_state_set(parser, PM_LEX_STATE_END);
9198 pm_buffer_init_capacity(&buffer, 3);
9200 escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
9207 return PM_TOKEN_CHARACTER_LITERAL;
9216 (parser->
current.end + encoding_width >= parser->
end) ||
9217 !char_is_identifier(parser, parser->
current.end + encoding_width, parser->
end - (parser->
current.end + encoding_width))
9220 lex_state_set(parser, PM_LEX_STATE_END);
9221 parser->
current.end += encoding_width;
9223 return PM_TOKEN_CHARACTER_LITERAL;
9227 return PM_TOKEN_QUESTION_MARK;
9234static pm_token_type_t
9236 pm_token_type_t
type = match(parser,
'@') ? PM_TOKEN_CLASS_VARIABLE : PM_TOKEN_INSTANCE_VARIABLE;
9237 const uint8_t *end = parser->
end;
9240 if ((width = char_is_identifier_start(parser, parser->
current.end, end - parser->
current.end)) > 0) {
9243 while ((width = char_is_identifier(parser, parser->
current.end, end - parser->
current.end)) > 0) {
9246 }
else if (parser->
current.end < end && pm_char_is_decimal_digit(*parser->
current.end)) {
9247 pm_diagnostic_id_t diag_id = (
type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
9249 diag_id = (
type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS_3_3 : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE_3_3;
9253 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, diag_id, (
int) ((parser->
current.end + width) - parser->
current.start), (
const char *) parser->
current.start);
9255 pm_diagnostic_id_t diag_id = (
type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_CLASS_VARIABLE_BARE : PM_ERR_INSTANCE_VARIABLE_BARE;
9256 pm_parser_err_token(parser, &parser->
current, diag_id);
9262 lex_mode_pop(parser);
9284 if (comment == NULL)
return NULL;
9288 .location = TOK2LOC(parser, &parser->
current)
9299static pm_token_type_t
9302 const uint8_t *newline = next_newline(parser->
current.end, parser->
end - parser->
current.end);
9304 if (newline == NULL) {
9307 pm_line_offset_list_append(&parser->
line_offsets, U32(newline - parser->
start + 1));
9308 parser->
current.end = newline + 1;
9311 parser->
current.type = PM_TOKEN_EMBDOC_BEGIN;
9312 parser_lex_callback(parser);
9315 const uint8_t *comment_start = parser->
current.start;
9316 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_EMBDOC);
9317 if (comment == NULL)
return PM_TOKEN_EOF;
9321 while (parser->
current.end + 4 <= parser->
end) {
9327 (memcmp(parser->
current.end,
"=end", 4) == 0) &&
9330 pm_char_is_whitespace(parser->
current.end[4]) ||
9331 (parser->
current.end[4] ==
'\0') ||
9332 (parser->
current.end[4] ==
'\004') ||
9333 (parser->
current.end[4] ==
'\032')
9336 const uint8_t *newline = next_newline(parser->
current.end, parser->
end - parser->
current.end);
9338 if (newline == NULL) {
9341 pm_line_offset_list_append(&parser->
line_offsets, U32(newline - parser->
start + 1));
9342 parser->
current.end = newline + 1;
9345 parser->
current.type = PM_TOKEN_EMBDOC_END;
9346 parser_lex_callback(parser);
9351 return PM_TOKEN_EMBDOC_END;
9356 const uint8_t *newline = next_newline(parser->
current.end, parser->
end - parser->
current.end);
9358 if (newline == NULL) {
9361 pm_line_offset_list_append(&parser->
line_offsets, U32(newline - parser->
start + 1));
9362 parser->
current.end = newline + 1;
9365 parser->
current.type = PM_TOKEN_EMBDOC_LINE;
9366 parser_lex_callback(parser);
9369 pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
9374 return PM_TOKEN_EOF;
9384 parser->
current.type = PM_TOKEN_IGNORED_NEWLINE;
9385 parser_lex_callback(parser);
9409 const uint8_t *cursor = parser->
current.end;
9411 while (cursor < parser->end && *cursor !=
'\n' && *cursor !=
'#') {
9412 if (!pm_char_is_inline_whitespace(*cursor++))
return false;
9475 pm_buffer_append_byte(&token_buffer->
buffer,
byte);
9492 width = pm_encoding_utf_8_char_width(parser->
current.end, parser->
end - parser->
current.end);
9497 return (width == 0 ? 1 : width);
9505 size_t width = parser_char_width(parser);
9506 pm_buffer_append_bytes(&token_buffer->
buffer, parser->
current.end, width);
9512 size_t width = parser_char_width(parser);
9519pm_slice_ascii_only_p(
const uint8_t *value,
size_t length) {
9520 for (
size_t index = 0; index < length; index++) {
9521 if (value[index] & 0x80)
return false;
9538 pm_string_constant_init(&parser->
current_string, (
const char *) arena_data,
len);
9544 pm_token_buffer_copy(parser, &token_buffer->
base);
9560 if (token_buffer->
cursor == NULL) {
9563 pm_buffer_append_bytes(&token_buffer->
buffer, token_buffer->
cursor, (
size_t) (parser->
current.end - token_buffer->
cursor));
9564 pm_token_buffer_copy(parser, token_buffer);
9576 pm_regexp_token_buffer_copy(parser, token_buffer);
9580#define PM_TOKEN_BUFFER_DEFAULT_SIZE 16
9592 const uint8_t *start;
9593 if (token_buffer->
cursor == NULL) {
9594 pm_buffer_init_capacity(&token_buffer->
buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9595 start = parser->
current.start;
9597 start = token_buffer->
cursor;
9600 const uint8_t *end = parser->
current.end - 1;
9601 assert(end >= start);
9602 pm_buffer_append_bytes(&token_buffer->
buffer, start, (
size_t) (end - start));
9604 token_buffer->
cursor = end;
9609 const uint8_t *start;
9611 pm_buffer_init_capacity(&token_buffer->
base.
buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9612 pm_buffer_init_capacity(&token_buffer->
regexp_buffer, PM_TOKEN_BUFFER_DEFAULT_SIZE);
9613 start = parser->
current.start;
9618 const uint8_t *end = parser->
current.end - 1;
9619 pm_buffer_append_bytes(&token_buffer->
base.
buffer, start, (
size_t) (end - start));
9620 pm_buffer_append_bytes(&token_buffer->
regexp_buffer, start, (
size_t) (end - start));
9625#undef PM_TOKEN_BUFFER_DEFAULT_SIZE
9633 size_t whitespace = 0;
9636 case PM_HEREDOC_INDENT_NONE:
9641 case PM_HEREDOC_INDENT_DASH:
9643 *cursor += pm_strspn_inline_whitespace(*cursor, parser->
end - *cursor);
9645 case PM_HEREDOC_INDENT_TILDE:
9648 while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
9649 if (**cursor ==
'\t') {
9650 whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
9669 size_t eol_length = match_eol(parser);
9676 parser_flush_heredoc_end(parser);
9679 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current) + U32(eol_length));
9682 uint8_t delimiter = *parser->
current.end;
9686 if (eol_length == 2) {
9687 delimiter = *(parser->
current.end + 1);
9690 parser->
current.end += eol_length;
9694 return *parser->
current.end++;
9701#define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
9720 bool lexed_comment =
false;
9734 case PM_LEX_DEFAULT:
9735 case PM_LEX_EMBEXPR:
9752 bool space_seen =
false;
9756 bool chomping =
true;
9757 while (parser->
current.end < parser->
end && chomping) {
9758 switch (*parser->
current.end) {
9767 if (match_eol_offset(parser, 1)) {
9770 pm_parser_warn(parser, PM_TOKEN_END(parser, &parser->
current), 1, PM_WARN_UNEXPECTED_CARRIAGE_RETURN);
9776 size_t eol_length = match_eol_offset(parser, 1);
9782 parser->
current.end += eol_length + 1;
9783 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current));
9786 }
else if (pm_char_is_inline_whitespace(*parser->
current.end)) {
9810 lex_mode_pop(parser);
9811 goto switch_lex_modes;
9827 switch (*parser->
current.end++) {
9835 const uint8_t *ending = next_newline(parser->
current.end, parser->
end - parser->
current.end);
9836 parser->
current.end = ending == NULL ? parser->
end : ending;
9841 pm_comment_t *comment = parser_comment(parser, PM_COMMENT_INLINE);
9844 if (ending) parser->
current.end++;
9845 parser->
current.type = PM_TOKEN_COMMENT;
9846 parser_lex_callback(parser);
9858 parser_lex_magic_comment_encoding(parser);
9862 lexed_comment =
true;
9868 size_t eol_length = match_eol_at(parser, parser->
current.end - 1);
9880 if (!lexed_comment) {
9881 parser->
current.end += eol_length - 1;
9885 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current));
9890 parser_flush_heredoc_end(parser);
9895 switch (lex_state_ignored_p(parser)) {
9896 case PM_IGNORED_NEWLINE_NONE:
9898 case PM_IGNORED_NEWLINE_PATTERN:
9900 if (!lexed_comment) parser_lex_ignored_newline(parser);
9901 lex_state_set(parser, PM_LEX_STATE_BEG);
9903 parser->
current.type = PM_TOKEN_NEWLINE;
9907 case PM_IGNORED_NEWLINE_ALL:
9908 if (!lexed_comment) parser_lex_ignored_newline(parser);
9909 lexed_comment =
false;
9910 goto lex_next_token;
9918 next_content += pm_strspn_inline_whitespace(next_content, parser->
end - next_content);
9920 if (next_content < parser->end) {
9926 if (next_content[0] ==
'#') {
9928 const uint8_t *following = next_newline(next_content, parser->
end - next_content);
9930 while (following && (following + 1 < parser->
end)) {
9932 following += pm_strspn_inline_whitespace(following, parser->
end - following);
9936 if (peek_at(parser, following) !=
'#')
break;
9940 following = next_newline(following, parser->
end - following);
9945 if (lex_state_ignored_p(parser)) {
9946 if (!lexed_comment) parser_lex_ignored_newline(parser);
9947 lexed_comment =
false;
9948 goto lex_next_token;
9954 (peek_at(parser, following) ==
'.') ||
9955 (peek_at(parser, following) ==
'&' && peek_at(parser, following + 1) ==
'.')
9957 if (!lexed_comment) parser_lex_ignored_newline(parser);
9958 lexed_comment =
false;
9959 goto lex_next_token;
9969 (peek_at(parser, following) ==
'&' && peek_at(parser, following + 1) ==
'&') ||
9970 (peek_at(parser, following) ==
'|' && peek_at(parser, following + 1) ==
'|') ||
9972 peek_at(parser, following) ==
'a' &&
9973 peek_at(parser, following + 1) ==
'n' &&
9974 peek_at(parser, following + 2) ==
'd' &&
9975 peek_at(parser, next_content + 3) !=
'!' &&
9976 peek_at(parser, next_content + 3) !=
'?' &&
9977 !char_is_identifier(parser, following + 3, parser->
end - (following + 3))
9980 peek_at(parser, following) ==
'o' &&
9981 peek_at(parser, following + 1) ==
'r' &&
9982 peek_at(parser, next_content + 2) !=
'!' &&
9983 peek_at(parser, next_content + 2) !=
'?' &&
9984 !char_is_identifier(parser, following + 2, parser->
end - (following + 2))
9988 if (!lexed_comment) parser_lex_ignored_newline(parser);
9989 lexed_comment =
false;
9990 goto lex_next_token;
9996 if (next_content[0] ==
'.') {
10000 if (peek_at(parser, next_content + 1) ==
'.') {
10001 if (!lexed_comment) parser_lex_ignored_newline(parser);
10002 lex_state_set(parser, PM_LEX_STATE_BEG);
10004 parser->
current.type = PM_TOKEN_NEWLINE;
10008 if (!lexed_comment) parser_lex_ignored_newline(parser);
10009 lex_state_set(parser, PM_LEX_STATE_DOT);
10010 parser->
current.start = next_content;
10011 parser->
current.end = next_content + 1;
10018 if (peek_at(parser, next_content) ==
'&' && peek_at(parser, next_content + 1) ==
'.') {
10019 if (!lexed_comment) parser_lex_ignored_newline(parser);
10020 lex_state_set(parser, PM_LEX_STATE_DOT);
10021 parser->
current.start = next_content;
10022 parser->
current.end = next_content + 2;
10024 LEX(PM_TOKEN_AMPERSAND_DOT);
10030 if (peek_at(parser, next_content) ==
'&' && peek_at(parser, next_content + 1) ==
'&') {
10031 if (!lexed_comment) parser_lex_ignored_newline(parser);
10032 lex_state_set(parser, PM_LEX_STATE_BEG);
10033 parser->
current.start = next_content;
10034 parser->
current.end = next_content + 2;
10036 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10041 if (peek_at(parser, next_content) ==
'|' && peek_at(parser, next_content + 1) ==
'|') {
10042 if (!lexed_comment) parser_lex_ignored_newline(parser);
10043 lex_state_set(parser, PM_LEX_STATE_BEG);
10044 parser->
current.start = next_content;
10045 parser->
current.end = next_content + 2;
10047 LEX(PM_TOKEN_PIPE_PIPE);
10053 peek_at(parser, next_content) ==
'a' &&
10054 peek_at(parser, next_content + 1) ==
'n' &&
10055 peek_at(parser, next_content + 2) ==
'd' &&
10056 peek_at(parser, next_content + 3) !=
'!' &&
10057 peek_at(parser, next_content + 3) !=
'?' &&
10058 !char_is_identifier(parser, next_content + 3, parser->
end - (next_content + 3))
10060 if (!lexed_comment) parser_lex_ignored_newline(parser);
10061 lex_state_set(parser, PM_LEX_STATE_BEG);
10062 parser->
current.start = next_content;
10063 parser->
current.end = next_content + 3;
10066 LEX(PM_TOKEN_KEYWORD_AND);
10072 peek_at(parser, next_content) ==
'o' &&
10073 peek_at(parser, next_content + 1) ==
'r' &&
10074 peek_at(parser, next_content + 2) !=
'!' &&
10075 peek_at(parser, next_content + 2) !=
'?' &&
10076 !char_is_identifier(parser, next_content + 2, parser->
end - (next_content + 2))
10078 if (!lexed_comment) parser_lex_ignored_newline(parser);
10079 lex_state_set(parser, PM_LEX_STATE_BEG);
10080 parser->
current.start = next_content;
10081 parser->
current.end = next_content + 2;
10084 LEX(PM_TOKEN_KEYWORD_OR);
10091 lex_state_set(parser, PM_LEX_STATE_BEG);
10093 parser->
current.type = PM_TOKEN_NEWLINE;
10094 if (!lexed_comment) parser_lex_callback(parser);
10104 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10105 LEX(PM_TOKEN_COMMA);
10109 pm_token_type_t
type = PM_TOKEN_PARENTHESIS_LEFT;
10111 if (space_seen && (lex_state_arg_p(parser) || parser->
lex_state == (PM_LEX_STATE_END | PM_LEX_STATE_LABEL))) {
10112 type = PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES;
10116 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10117 pm_do_loop_stack_push(parser,
false);
10124 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10125 pm_do_loop_stack_pop(parser);
10126 LEX(PM_TOKEN_PARENTHESIS_RIGHT);
10130 lex_state_set(parser, PM_LEX_STATE_BEG);
10132 LEX(PM_TOKEN_SEMICOLON);
10137 pm_token_type_t
type = PM_TOKEN_BRACKET_LEFT;
10139 if (lex_state_operator_p(parser)) {
10140 if (match(parser,
']')) {
10142 lex_state_set(parser, PM_LEX_STATE_ARG);
10143 LEX(match(parser,
'=') ? PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL : PM_TOKEN_BRACKET_LEFT_RIGHT);
10146 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABEL);
10150 if (lex_state_beg_p(parser) || (lex_state_arg_p(parser) && (space_seen || lex_state_p(parser, PM_LEX_STATE_LABELED)))) {
10151 type = PM_TOKEN_BRACKET_LEFT_ARRAY;
10154 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10155 pm_do_loop_stack_push(parser,
false);
10161 lex_state_set(parser, PM_LEX_STATE_END);
10162 pm_do_loop_stack_pop(parser);
10163 LEX(PM_TOKEN_BRACKET_RIGHT);
10167 pm_token_type_t
type = PM_TOKEN_BRACE_LEFT;
10172 lex_state_set(parser, PM_LEX_STATE_BEG);
10173 type = PM_TOKEN_LAMBDA_BEGIN;
10174 }
else if (lex_state_p(parser, PM_LEX_STATE_LABELED)) {
10176 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10177 }
else if (lex_state_p(parser, PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_END | PM_LEX_STATE_ENDFN)) {
10180 lex_state_set(parser, PM_LEX_STATE_BEG);
10181 }
else if (lex_state_p(parser, PM_LEX_STATE_ENDARG)) {
10184 lex_state_set(parser, PM_LEX_STATE_BEG);
10187 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10192 pm_do_loop_stack_push(parser,
false);
10200 pm_do_loop_stack_pop(parser);
10203 lex_mode_pop(parser);
10204 LEX(PM_TOKEN_EMBEXPR_END);
10208 lex_state_set(parser, PM_LEX_STATE_END);
10209 LEX(PM_TOKEN_BRACE_RIGHT);
10213 if (match(parser,
'*')) {
10214 if (match(parser,
'=')) {
10215 lex_state_set(parser, PM_LEX_STATE_BEG);
10216 LEX(PM_TOKEN_STAR_STAR_EQUAL);
10219 pm_token_type_t
type = PM_TOKEN_STAR_STAR;
10221 if (lex_state_spcarg_p(parser, space_seen)) {
10222 pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_PREFIX_STAR_STAR);
10223 type = PM_TOKEN_USTAR_STAR;
10224 }
else if (lex_state_beg_p(parser)) {
10225 type = PM_TOKEN_USTAR_STAR;
10226 }
else if (ambiguous_operator_p(parser, space_seen)) {
10227 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"**",
"argument prefix");
10230 if (lex_state_operator_p(parser)) {
10231 lex_state_set(parser, PM_LEX_STATE_ARG);
10233 lex_state_set(parser, PM_LEX_STATE_BEG);
10239 if (match(parser,
'=')) {
10240 lex_state_set(parser, PM_LEX_STATE_BEG);
10241 LEX(PM_TOKEN_STAR_EQUAL);
10244 pm_token_type_t
type = PM_TOKEN_STAR;
10246 if (lex_state_spcarg_p(parser, space_seen)) {
10247 pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
10248 type = PM_TOKEN_USTAR;
10249 }
else if (lex_state_beg_p(parser)) {
10250 type = PM_TOKEN_USTAR;
10251 }
else if (ambiguous_operator_p(parser, space_seen)) {
10252 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"*",
"argument prefix");
10255 if (lex_state_operator_p(parser)) {
10256 lex_state_set(parser, PM_LEX_STATE_ARG);
10258 lex_state_set(parser, PM_LEX_STATE_BEG);
10266 if (lex_state_operator_p(parser)) {
10267 lex_state_set(parser, PM_LEX_STATE_ARG);
10268 if (match(parser,
'@')) {
10269 LEX(PM_TOKEN_BANG);
10272 lex_state_set(parser, PM_LEX_STATE_BEG);
10275 if (match(parser,
'=')) {
10276 LEX(PM_TOKEN_BANG_EQUAL);
10279 if (match(parser,
'~')) {
10280 LEX(PM_TOKEN_BANG_TILDE);
10283 LEX(PM_TOKEN_BANG);
10288 current_token_starts_line(parser) &&
10290 memcmp(parser->
current.end,
"begin", 5) == 0 &&
10291 (pm_char_is_whitespace(peek_offset(parser, 5)) || (peek_offset(parser, 5) ==
'\0'))
10293 pm_token_type_t
type = lex_embdoc(parser);
10294 if (
type == PM_TOKEN_EOF) {
10298 goto lex_next_token;
10301 if (lex_state_operator_p(parser)) {
10302 lex_state_set(parser, PM_LEX_STATE_ARG);
10304 lex_state_set(parser, PM_LEX_STATE_BEG);
10307 if (match(parser,
'>')) {
10308 LEX(PM_TOKEN_EQUAL_GREATER);
10311 if (match(parser,
'~')) {
10312 LEX(PM_TOKEN_EQUAL_TILDE);
10315 if (match(parser,
'=')) {
10316 LEX(match(parser,
'=') ? PM_TOKEN_EQUAL_EQUAL_EQUAL : PM_TOKEN_EQUAL_EQUAL);
10319 LEX(PM_TOKEN_EQUAL);
10323 if (match(parser,
'<')) {
10325 !lex_state_p(parser, PM_LEX_STATE_DOT | PM_LEX_STATE_CLASS) &&
10326 !lex_state_end_p(parser) &&
10327 (!lex_state_p(parser, PM_LEX_STATE_ARG_ANY) || lex_state_p(parser, PM_LEX_STATE_LABELED) || space_seen)
10329 const uint8_t *end = parser->
current.end;
10334 if (match(parser,
'-')) {
10335 indent = PM_HEREDOC_INDENT_DASH;
10337 else if (match(parser,
'~')) {
10338 indent = PM_HEREDOC_INDENT_TILDE;
10341 if (match(parser,
'`')) {
10342 quote = PM_HEREDOC_QUOTE_BACKTICK;
10344 else if (match(parser,
'"')) {
10345 quote = PM_HEREDOC_QUOTE_DOUBLE;
10347 else if (match(parser,
'\'')) {
10348 quote = PM_HEREDOC_QUOTE_SINGLE;
10351 const uint8_t *ident_start = parser->
current.end;
10356 }
else if (quote == PM_HEREDOC_QUOTE_NONE && (width = char_is_identifier(parser, parser->
current.end, parser->
end - parser->
current.end)) == 0) {
10359 if (quote == PM_HEREDOC_QUOTE_NONE) {
10360 parser->
current.end += width;
10362 while ((width = char_is_identifier(parser, parser->
current.end, parser->
end - parser->
current.end))) {
10363 parser->
current.end += width;
10369 if (*parser->
current.end ==
'\r' || *parser->
current.end ==
'\n')
break;
10374 size_t ident_length = (size_t) (parser->
current.end - ident_start);
10375 bool ident_error =
false;
10377 if (quote != PM_HEREDOC_QUOTE_NONE && !match(parser, (uint8_t) quote)) {
10378 pm_parser_err(parser, U32(ident_start - parser->
start), U32(ident_length), PM_ERR_HEREDOC_IDENTIFIER);
10379 ident_error =
true;
10384 .mode = PM_LEX_HEREDOC,
10387 .ident_start = ident_start,
10388 .ident_length = ident_length,
10392 .next_start = parser->
current.end,
10394 .line_continuation =
false
10399 const uint8_t *body_start = next_newline(parser->
current.end, parser->
end - parser->
current.end);
10401 if (body_start == NULL) {
10406 if (!ident_error) pm_parser_err_heredoc_term(parser, ident_start, ident_length);
10407 body_start = parser->
end;
10411 pm_line_offset_list_append(&parser->
line_offsets, U32(body_start - parser->
start + 1));
10420 LEX(PM_TOKEN_HEREDOC_START);
10424 if (match(parser,
'=')) {
10425 lex_state_set(parser, PM_LEX_STATE_BEG);
10426 LEX(PM_TOKEN_LESS_LESS_EQUAL);
10429 if (ambiguous_operator_p(parser, space_seen)) {
10430 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"<<",
"here document");
10433 if (lex_state_operator_p(parser)) {
10434 lex_state_set(parser, PM_LEX_STATE_ARG);
10436 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->
command_start =
true;
10437 lex_state_set(parser, PM_LEX_STATE_BEG);
10440 LEX(PM_TOKEN_LESS_LESS);
10443 if (lex_state_operator_p(parser)) {
10444 lex_state_set(parser, PM_LEX_STATE_ARG);
10446 if (lex_state_p(parser, PM_LEX_STATE_CLASS)) parser->
command_start =
true;
10447 lex_state_set(parser, PM_LEX_STATE_BEG);
10450 if (match(parser,
'=')) {
10451 if (match(parser,
'>')) {
10452 LEX(PM_TOKEN_LESS_EQUAL_GREATER);
10455 LEX(PM_TOKEN_LESS_EQUAL);
10458 LEX(PM_TOKEN_LESS);
10462 if (match(parser,
'>')) {
10463 if (lex_state_operator_p(parser)) {
10464 lex_state_set(parser, PM_LEX_STATE_ARG);
10466 lex_state_set(parser, PM_LEX_STATE_BEG);
10468 LEX(match(parser,
'=') ? PM_TOKEN_GREATER_GREATER_EQUAL : PM_TOKEN_GREATER_GREATER);
10471 if (lex_state_operator_p(parser)) {
10472 lex_state_set(parser, PM_LEX_STATE_ARG);
10474 lex_state_set(parser, PM_LEX_STATE_BEG);
10477 LEX(match(parser,
'=') ? PM_TOKEN_GREATER_EQUAL : PM_TOKEN_GREATER);
10481 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10482 lex_mode_push_string(parser,
true, label_allowed,
'\0',
'"');
10483 LEX(PM_TOKEN_STRING_BEGIN);
10488 if (lex_state_p(parser, PM_LEX_STATE_FNAME)) {
10489 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10490 LEX(PM_TOKEN_BACKTICK);
10493 if (lex_state_p(parser, PM_LEX_STATE_DOT)) {
10494 if (previous_command_start) {
10495 lex_state_set(parser, PM_LEX_STATE_CMDARG);
10497 lex_state_set(parser, PM_LEX_STATE_ARG);
10500 LEX(PM_TOKEN_BACKTICK);
10503 lex_mode_push_string(parser,
true,
false,
'\0',
'`');
10504 LEX(PM_TOKEN_BACKTICK);
10509 bool label_allowed = (lex_state_p(parser, PM_LEX_STATE_LABEL | PM_LEX_STATE_ENDFN) && !previous_command_start) || lex_state_arg_p(parser);
10510 lex_mode_push_string(parser,
false, label_allowed,
'\0',
'\'');
10511 LEX(PM_TOKEN_STRING_BEGIN);
10516 LEX(lex_question_mark(parser));
10520 if (match(parser,
'&')) {
10521 lex_state_set(parser, PM_LEX_STATE_BEG);
10523 if (match(parser,
'=')) {
10524 LEX(PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL);
10527 LEX(PM_TOKEN_AMPERSAND_AMPERSAND);
10530 if (match(parser,
'=')) {
10531 lex_state_set(parser, PM_LEX_STATE_BEG);
10532 LEX(PM_TOKEN_AMPERSAND_EQUAL);
10535 if (match(parser,
'.')) {
10536 lex_state_set(parser, PM_LEX_STATE_DOT);
10537 LEX(PM_TOKEN_AMPERSAND_DOT);
10540 pm_token_type_t
type = PM_TOKEN_AMPERSAND;
10541 if (lex_state_spcarg_p(parser, space_seen)) {
10542 if ((peek(parser) !=
':') || (peek_offset(parser, 1) ==
'\0')) {
10543 pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10545 const uint8_t delim = peek_offset(parser, 1);
10547 if ((delim !=
'\'') && (delim !=
'"') && !char_is_identifier(parser, parser->
current.end + 1, parser->
end - (parser->
current.end + 1))) {
10548 pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_PREFIX_AMPERSAND);
10552 type = PM_TOKEN_UAMPERSAND;
10553 }
else if (lex_state_beg_p(parser)) {
10554 type = PM_TOKEN_UAMPERSAND;
10555 }
else if (ambiguous_operator_p(parser, space_seen)) {
10556 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"&",
"argument prefix");
10559 if (lex_state_operator_p(parser)) {
10560 lex_state_set(parser, PM_LEX_STATE_ARG);
10562 lex_state_set(parser, PM_LEX_STATE_BEG);
10570 if (match(parser,
'|')) {
10571 if (match(parser,
'=')) {
10572 lex_state_set(parser, PM_LEX_STATE_BEG);
10573 LEX(PM_TOKEN_PIPE_PIPE_EQUAL);
10576 if (lex_state_p(parser, PM_LEX_STATE_BEG)) {
10578 LEX(PM_TOKEN_PIPE);
10581 lex_state_set(parser, PM_LEX_STATE_BEG);
10582 LEX(PM_TOKEN_PIPE_PIPE);
10585 if (match(parser,
'=')) {
10586 lex_state_set(parser, PM_LEX_STATE_BEG);
10587 LEX(PM_TOKEN_PIPE_EQUAL);
10590 if (lex_state_operator_p(parser)) {
10591 lex_state_set(parser, PM_LEX_STATE_ARG);
10593 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10596 LEX(PM_TOKEN_PIPE);
10600 if (lex_state_operator_p(parser)) {
10601 lex_state_set(parser, PM_LEX_STATE_ARG);
10603 if (match(parser,
'@')) {
10604 LEX(PM_TOKEN_UPLUS);
10607 LEX(PM_TOKEN_PLUS);
10610 if (match(parser,
'=')) {
10611 lex_state_set(parser, PM_LEX_STATE_BEG);
10612 LEX(PM_TOKEN_PLUS_EQUAL);
10616 lex_state_beg_p(parser) ||
10617 (lex_state_spcarg_p(parser, space_seen) ? (pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS),
true) : false)
10619 lex_state_set(parser, PM_LEX_STATE_BEG);
10621 if (pm_char_is_decimal_digit(peek(parser))) {
10623 pm_token_type_t
type = lex_numeric(parser);
10624 lex_state_set(parser, PM_LEX_STATE_END);
10628 LEX(PM_TOKEN_UPLUS);
10631 if (ambiguous_operator_p(parser, space_seen)) {
10632 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"+",
"unary operator");
10635 lex_state_set(parser, PM_LEX_STATE_BEG);
10636 LEX(PM_TOKEN_PLUS);
10641 if (lex_state_operator_p(parser)) {
10642 lex_state_set(parser, PM_LEX_STATE_ARG);
10644 if (match(parser,
'@')) {
10645 LEX(PM_TOKEN_UMINUS);
10648 LEX(PM_TOKEN_MINUS);
10651 if (match(parser,
'=')) {
10652 lex_state_set(parser, PM_LEX_STATE_BEG);
10653 LEX(PM_TOKEN_MINUS_EQUAL);
10656 if (match(parser,
'>')) {
10657 lex_state_set(parser, PM_LEX_STATE_ENDFN);
10658 LEX(PM_TOKEN_MINUS_GREATER);
10661 bool spcarg = lex_state_spcarg_p(parser, space_seen);
10662 bool is_beg = lex_state_beg_p(parser);
10663 if (!is_beg && spcarg) {
10664 pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
10667 if (is_beg || spcarg) {
10668 lex_state_set(parser, PM_LEX_STATE_BEG);
10669 LEX(pm_char_is_decimal_digit(peek(parser)) ? PM_TOKEN_UMINUS_NUM : PM_TOKEN_UMINUS);
10672 if (ambiguous_operator_p(parser, space_seen)) {
10673 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"-",
"unary operator");
10676 lex_state_set(parser, PM_LEX_STATE_BEG);
10677 LEX(PM_TOKEN_MINUS);
10682 bool beg_p = lex_state_beg_p(parser);
10684 if (match(parser,
'.')) {
10685 if (match(parser,
'.')) {
10688 if (lex_state_p(parser, PM_LEX_STATE_END)) {
10689 lex_state_set(parser, PM_LEX_STATE_BEG);
10691 lex_state_set(parser, PM_LEX_STATE_ENDARG);
10693 LEX(PM_TOKEN_UDOT_DOT_DOT);
10697 pm_parser_warn_token(parser, &parser->
current, PM_WARN_DOT_DOT_DOT_EOL);
10700 lex_state_set(parser, PM_LEX_STATE_BEG);
10701 LEX(beg_p ? PM_TOKEN_UDOT_DOT_DOT : PM_TOKEN_DOT_DOT_DOT);
10704 lex_state_set(parser, PM_LEX_STATE_BEG);
10705 LEX(beg_p ? PM_TOKEN_UDOT_DOT : PM_TOKEN_DOT_DOT);
10708 lex_state_set(parser, PM_LEX_STATE_DOT);
10723 pm_token_type_t
type = lex_numeric(parser);
10724 lex_state_set(parser, PM_LEX_STATE_END);
10730 if (match(parser,
':')) {
10731 if (lex_state_beg_p(parser) || lex_state_p(parser, PM_LEX_STATE_CLASS) || (lex_state_p(parser, PM_LEX_STATE_ARG_ANY) && space_seen)) {
10732 lex_state_set(parser, PM_LEX_STATE_BEG);
10733 LEX(PM_TOKEN_UCOLON_COLON);
10736 lex_state_set(parser, PM_LEX_STATE_DOT);
10737 LEX(PM_TOKEN_COLON_COLON);
10740 if (lex_state_end_p(parser) || pm_char_is_whitespace(peek(parser)) || peek(parser) ==
'#') {
10741 lex_state_set(parser, PM_LEX_STATE_BEG);
10742 LEX(PM_TOKEN_COLON);
10745 if (peek(parser) ==
'"' || peek(parser) ==
'\'') {
10746 lex_mode_push_string(parser, peek(parser) ==
'"',
false,
'\0', *parser->
current.end);
10750 lex_state_set(parser, PM_LEX_STATE_FNAME);
10751 LEX(PM_TOKEN_SYMBOL_BEGIN);
10755 if (lex_state_beg_p(parser)) {
10756 lex_mode_push_regexp(parser,
'\0',
'/');
10757 LEX(PM_TOKEN_REGEXP_BEGIN);
10760 if (match(parser,
'=')) {
10761 lex_state_set(parser, PM_LEX_STATE_BEG);
10762 LEX(PM_TOKEN_SLASH_EQUAL);
10765 if (lex_state_spcarg_p(parser, space_seen)) {
10766 pm_parser_warn_token(parser, &parser->
current, PM_WARN_AMBIGUOUS_SLASH);
10767 lex_mode_push_regexp(parser,
'\0',
'/');
10768 LEX(PM_TOKEN_REGEXP_BEGIN);
10771 if (ambiguous_operator_p(parser, space_seen)) {
10772 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"/",
"regexp literal");
10775 if (lex_state_operator_p(parser)) {
10776 lex_state_set(parser, PM_LEX_STATE_ARG);
10778 lex_state_set(parser, PM_LEX_STATE_BEG);
10781 LEX(PM_TOKEN_SLASH);
10785 if (lex_state_operator_p(parser)) {
10786 lex_state_set(parser, PM_LEX_STATE_ARG);
10788 lex_state_set(parser, PM_LEX_STATE_BEG);
10790 LEX(match(parser,
'=') ? PM_TOKEN_CARET_EQUAL : PM_TOKEN_CARET);
10794 if (lex_state_operator_p(parser)) {
10795 (void) match(parser,
'@');
10796 lex_state_set(parser, PM_LEX_STATE_ARG);
10798 lex_state_set(parser, PM_LEX_STATE_BEG);
10801 LEX(PM_TOKEN_TILDE);
10809 if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->
current.end >= parser->
end)) {
10810 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT_EOF);
10811 LEX(PM_TOKEN_PERCENT);
10814 if (!lex_state_beg_p(parser) && match(parser,
'=')) {
10815 lex_state_set(parser, PM_LEX_STATE_BEG);
10816 LEX(PM_TOKEN_PERCENT_EQUAL);
10818 lex_state_beg_p(parser) ||
10819 (lex_state_p(parser, PM_LEX_STATE_FITEM) && (peek(parser) ==
's')) ||
10820 lex_state_spcarg_p(parser, space_seen)
10823 if (*parser->
current.end >= 0x80) {
10824 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10827 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10828 lex_mode_push_string(parser,
true,
false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10829 LEX(PM_TOKEN_STRING_BEGIN);
10834 uint8_t delimiter = peek_offset(parser, 1);
10836 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10837 goto lex_next_token;
10840 switch (peek(parser)) {
10845 lex_mode_push_list(parser,
false, pm_lex_percent_delimiter(parser));
10847 lex_mode_push_list_eof(parser);
10850 LEX(PM_TOKEN_PERCENT_LOWER_I);
10856 lex_mode_push_list(parser,
true, pm_lex_percent_delimiter(parser));
10858 lex_mode_push_list_eof(parser);
10861 LEX(PM_TOKEN_PERCENT_UPPER_I);
10867 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10868 lex_mode_push_regexp(parser, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10870 lex_mode_push_regexp(parser,
'\0',
'\0');
10873 LEX(PM_TOKEN_REGEXP_BEGIN);
10879 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10880 lex_mode_push_string(parser,
false,
false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10882 lex_mode_push_string_eof(parser);
10885 LEX(PM_TOKEN_STRING_BEGIN);
10891 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10892 lex_mode_push_string(parser,
true,
false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10894 lex_mode_push_string_eof(parser);
10897 LEX(PM_TOKEN_STRING_BEGIN);
10903 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10904 lex_mode_push_string(parser,
false,
false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10905 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
10907 lex_mode_push_string_eof(parser);
10910 LEX(PM_TOKEN_SYMBOL_BEGIN);
10916 lex_mode_push_list(parser,
false, pm_lex_percent_delimiter(parser));
10918 lex_mode_push_list_eof(parser);
10921 LEX(PM_TOKEN_PERCENT_LOWER_W);
10927 lex_mode_push_list(parser,
true, pm_lex_percent_delimiter(parser));
10929 lex_mode_push_list_eof(parser);
10932 LEX(PM_TOKEN_PERCENT_UPPER_W);
10938 const uint8_t delimiter = pm_lex_percent_delimiter(parser);
10939 lex_mode_push_string(parser,
true,
false, lex_mode_incrementor(delimiter), lex_mode_terminator(delimiter));
10941 lex_mode_push_string_eof(parser);
10944 LEX(PM_TOKEN_PERCENT_LOWER_X);
10951 pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
10952 goto lex_next_token;
10956 if (ambiguous_operator_p(parser, space_seen)) {
10957 PM_PARSER_WARN_TOKEN_FORMAT(parser, &parser->
current, PM_WARN_AMBIGUOUS_BINARY_OPERATOR,
"%",
"string literal");
10960 lex_state_set(parser, lex_state_operator_p(parser) ? PM_LEX_STATE_ARG : PM_LEX_STATE_BEG);
10961 LEX(PM_TOKEN_PERCENT);
10966 pm_token_type_t
type = lex_global_variable(parser);
10971 lex_mode_pop(parser);
10974 lex_state_set(parser, PM_LEX_STATE_END);
10980 lex_state_set(parser, parser->
lex_state & PM_LEX_STATE_FNAME ? PM_LEX_STATE_ENDFN : PM_LEX_STATE_END);
10981 LEX(lex_at_variable(parser));
10984 if (*parser->
current.start !=
'_') {
10985 size_t width = char_is_identifier_start(parser, parser->
current.start, parser->
end - parser->
current.start);
10992 if (*parser->
current.start >= 0x80) {
10993 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_INVALID_MULTIBYTE_CHARACTER, *parser->
current.start);
10994 }
else if (*parser->
current.start ==
'\\') {
10995 switch (peek_at(parser, parser->
current.start + 1)) {
10998 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_IGNORE,
"escaped space");
11002 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_IGNORE,
"escaped form feed");
11006 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_IGNORE,
"escaped horizontal tab");
11010 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_IGNORE,
"escaped vertical tab");
11013 if (peek_at(parser, parser->
current.start + 2) !=
'\n') {
11015 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_IGNORE,
"escaped carriage return");
11020 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_IGNORE,
"backslash");
11023 }
else if (char_is_ascii_printable(*parser->
current.start)) {
11024 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_INVALID_PRINTABLE_CHARACTER, *parser->
current.start);
11026 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_INVALID_CHARACTER, *parser->
current.start);
11029 goto lex_next_token;
11035 pm_token_type_t
type = lex_identifier(parser, previous_command_start);
11043 current_token_starts_line(parser) &&
11044 (memcmp(parser->
current.start,
"__END__", 7) == 0) &&
11045 (parser->
current.end == parser->
end || match_eol(parser))
11050 const uint8_t *cursor = parser->
current.end;
11051 while ((cursor = next_newline(cursor, parser->
end - cursor)) != NULL) {
11052 pm_line_offset_list_append(&parser->
line_offsets, U32(++cursor - parser->
start));
11056 parser->
current.type = PM_TOKEN___END__;
11057 parser_lex_callback(parser);
11067 if (
type == PM_TOKEN_IDENTIFIER ||
type == PM_TOKEN_CONSTANT ||
type == PM_TOKEN_METHOD_NAME) {
11068 if (lex_state_p(parser, PM_LEX_STATE_BEG_ANY | PM_LEX_STATE_ARG_ANY | PM_LEX_STATE_DOT)) {
11069 if (previous_command_start) {
11070 lex_state_set(parser, PM_LEX_STATE_CMDARG);
11072 lex_state_set(parser, PM_LEX_STATE_ARG);
11074 }
else if (parser->
lex_state == PM_LEX_STATE_FNAME) {
11075 lex_state_set(parser, PM_LEX_STATE_ENDFN);
11077 lex_state_set(parser, PM_LEX_STATE_END);
11082 !(last_state & (PM_LEX_STATE_DOT | PM_LEX_STATE_FNAME)) &&
11083 (
type == PM_TOKEN_IDENTIFIER) &&
11084 ((pm_parser_local_depth(parser, &parser->
current) != -1) ||
11085 pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)))
11087 lex_state_set(parser, PM_LEX_STATE_END | PM_LEX_STATE_LABEL);
11094 case PM_LEX_LIST: {
11108 whitespace = pm_strspn_inline_whitespace(parser->
current.end, parser->
end - parser->
current.end);
11109 if (peek_offset(parser, (ptrdiff_t)whitespace) ==
'\n') {
11116 if (whitespace > 0) {
11117 parser->
current.end += whitespace;
11118 if (peek_offset(parser, -1) ==
'\n') {
11120 parser_flush_heredoc_end(parser);
11122 LEX(PM_TOKEN_WORDS_SEP);
11134 const uint8_t *breakpoints = lex_mode->
as.list.
breakpoints;
11135 const uint8_t *breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11141 while (breakpoint != NULL) {
11144 if (pm_char_is_whitespace(*breakpoint)) {
11145 parser->
current.end = breakpoint;
11146 pm_token_buffer_flush(parser, &token_buffer);
11147 LEX(PM_TOKEN_STRING_CONTENT);
11156 parser->
current.end = breakpoint + 1;
11157 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11164 if (breakpoint > parser->
current.start) {
11165 parser->
current.end = breakpoint;
11166 pm_token_buffer_flush(parser, &token_buffer);
11167 LEX(PM_TOKEN_STRING_CONTENT);
11172 parser->
current.end = breakpoint + 1;
11173 lex_mode_pop(parser);
11174 lex_state_set(parser, PM_LEX_STATE_END);
11175 LEX(PM_TOKEN_STRING_END);
11179 if (*breakpoint ==
'\0') {
11180 breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->
end - (breakpoint + 1),
true);
11187 if (*breakpoint ==
'\\') {
11188 parser->
current.end = breakpoint + 1;
11197 pm_token_buffer_escape(parser, &token_buffer);
11198 uint8_t peeked = peek(parser);
11206 pm_token_buffer_push_byte(&token_buffer, peeked);
11211 if (peek(parser) !=
'\n') {
11212 pm_token_buffer_push_byte(&token_buffer,
'\r');
11217 pm_token_buffer_push_byte(&token_buffer,
'\n');
11223 parser_flush_heredoc_end(parser);
11224 pm_token_buffer_copy(parser, &token_buffer);
11225 LEX(PM_TOKEN_STRING_CONTENT);
11228 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current) + 1);
11235 pm_token_buffer_push_byte(&token_buffer, peeked);
11238 escape_read(parser, &token_buffer.
buffer, NULL, PM_ESCAPE_FLAG_NONE);
11240 pm_token_buffer_push_byte(&token_buffer,
'\\');
11241 pm_token_buffer_push_escaped(&token_buffer, parser);
11248 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11253 if (*breakpoint ==
'#') {
11254 pm_token_type_t
type = lex_interpolation(parser, breakpoint);
11261 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11265 if (
type == PM_TOKEN_STRING_CONTENT) {
11266 pm_token_buffer_flush(parser, &token_buffer);
11275 parser->
current.end = breakpoint + 1;
11276 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11282 pm_token_buffer_flush(parser, &token_buffer);
11283 LEX(PM_TOKEN_STRING_CONTENT);
11289 pm_token_buffer_flush(parser, &token_buffer);
11290 LEX(PM_TOKEN_STRING_CONTENT);
11292 case PM_LEX_REGEXP: {
11314 const uint8_t *breakpoints = lex_mode->
as.regexp.
breakpoints;
11315 const uint8_t *breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11318 while (breakpoint != NULL) {
11320 bool is_terminator = (*breakpoint == term);
11325 if (*breakpoint ==
'\r' && peek_at(parser, breakpoint + 1) ==
'\n') {
11326 if (term ==
'\n') {
11327 is_terminator =
true;
11333 if (term ==
'\r') {
11334 is_terminator =
false;
11340 if (is_terminator) {
11342 parser->
current.end = breakpoint + 1;
11343 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11351 if (breakpoint > parser->
current.start) {
11352 parser->
current.end = breakpoint;
11353 pm_regexp_token_buffer_flush(parser, &token_buffer);
11354 LEX(PM_TOKEN_STRING_CONTENT);
11358 size_t eol_length = match_eol_at(parser, breakpoint);
11360 parser->
current.end = breakpoint + eol_length;
11366 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current));
11369 parser->
current.end = breakpoint + 1;
11376 lex_mode_pop(parser);
11377 lex_state_set(parser, PM_LEX_STATE_END);
11378 LEX(PM_TOKEN_REGEXP_END);
11383 if (*breakpoint && *breakpoint == lex_mode->
as.regexp.
incrementor) {
11384 parser->
current.end = breakpoint + 1;
11385 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11390 switch (*breakpoint) {
11393 parser->
current.end = breakpoint + 1;
11394 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11397 if (peek_at(parser, breakpoint + 1) !=
'\n') {
11398 parser->
current.end = breakpoint + 1;
11399 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11404 parser->
current.end = breakpoint;
11405 pm_regexp_token_buffer_escape(parser, &token_buffer);
11413 pm_line_offset_list_append(&parser->
line_offsets, U32(breakpoint - parser->
start + 1));
11414 parser->
current.end = breakpoint + 1;
11415 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11419 parser->
current.end = breakpoint + 1;
11420 parser_flush_heredoc_end(parser);
11421 pm_regexp_token_buffer_flush(parser, &token_buffer);
11422 LEX(PM_TOKEN_STRING_CONTENT);
11427 parser->
current.end = breakpoint + 1;
11436 pm_regexp_token_buffer_escape(parser, &token_buffer);
11437 uint8_t peeked = peek(parser);
11442 if (peek(parser) !=
'\n') {
11444 pm_token_buffer_push_byte(&token_buffer.
base,
'\\');
11446 pm_regexp_token_buffer_push_byte(&token_buffer,
'\r');
11447 pm_token_buffer_push_byte(&token_buffer.
base,
'\r');
11456 parser_flush_heredoc_end(parser);
11457 pm_regexp_token_buffer_copy(parser, &token_buffer);
11458 LEX(PM_TOKEN_STRING_CONTENT);
11461 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current) + 1);
11479 case '$':
case ')':
case '*':
case '+':
11480 case '.':
case '>':
case '?':
case ']':
11481 case '^':
case '|':
case '}':
11482 pm_token_buffer_push_byte(&token_buffer.
base,
'\\');
11488 pm_regexp_token_buffer_push_byte(&token_buffer, peeked);
11489 pm_token_buffer_push_byte(&token_buffer.
base, peeked);
11494 if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer.
base,
'\\');
11495 pm_regexp_token_buffer_push_escaped(&token_buffer, parser);
11500 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11506 pm_token_type_t
type = lex_interpolation(parser, breakpoint);
11513 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
false);
11517 if (
type == PM_TOKEN_STRING_CONTENT) {
11518 pm_regexp_token_buffer_flush(parser, &token_buffer);
11524 assert(
false &&
"unreachable");
11530 pm_regexp_token_buffer_flush(parser, &token_buffer);
11531 LEX(PM_TOKEN_STRING_CONTENT);
11537 pm_regexp_token_buffer_flush(parser, &token_buffer);
11538 LEX(PM_TOKEN_STRING_CONTENT);
11540 case PM_LEX_STRING: {
11559 const uint8_t *breakpoints = lex_mode->
as.string.
breakpoints;
11560 const uint8_t *breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11566 while (breakpoint != NULL) {
11571 parser->
current.end = breakpoint + 1;
11572 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11577 bool is_terminator = (*breakpoint == term);
11582 if (*breakpoint ==
'\r' && peek_at(parser, breakpoint + 1) ==
'\n') {
11583 if (term ==
'\n') {
11584 is_terminator =
true;
11590 if (term ==
'\r') {
11591 is_terminator =
false;
11598 if (is_terminator) {
11602 parser->
current.end = breakpoint + 1;
11603 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11610 if (breakpoint > parser->
current.start) {
11611 parser->
current.end = breakpoint;
11612 pm_token_buffer_flush(parser, &token_buffer);
11613 LEX(PM_TOKEN_STRING_CONTENT);
11618 size_t eol_length = match_eol_at(parser, breakpoint);
11620 parser->
current.end = breakpoint + eol_length;
11626 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current));
11629 parser->
current.end = breakpoint + 1;
11632 if (lex_mode->
as.string.
label_allowed && (peek(parser) ==
':') && (peek_offset(parser, 1) !=
':')) {
11634 lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
11635 lex_mode_pop(parser);
11636 LEX(PM_TOKEN_LABEL_END);
11643 parser_flush_heredoc_end(parser);
11646 lex_state_set(parser, PM_LEX_STATE_END);
11647 lex_mode_pop(parser);
11648 LEX(PM_TOKEN_STRING_END);
11651 switch (*breakpoint) {
11654 parser->
current.end = breakpoint + 1;
11655 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11658 if (peek_at(parser, breakpoint + 1) !=
'\n') {
11659 parser->
current.end = breakpoint + 1;
11660 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11667 parser->
current.end = breakpoint;
11668 pm_token_buffer_escape(parser, &token_buffer);
11669 token_buffer.
cursor = breakpoint;
11678 pm_line_offset_list_append(&parser->
line_offsets, U32(breakpoint - parser->
start + 1));
11679 parser->
current.end = breakpoint + 1;
11680 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11684 parser->
current.end = breakpoint + 1;
11685 parser_flush_heredoc_end(parser);
11686 pm_token_buffer_flush(parser, &token_buffer);
11687 LEX(PM_TOKEN_STRING_CONTENT);
11690 parser->
current.end = breakpoint + 1;
11699 pm_token_buffer_escape(parser, &token_buffer);
11700 uint8_t peeked = peek(parser);
11704 pm_token_buffer_push_byte(&token_buffer,
'\\');
11709 if (peek(parser) !=
'\n') {
11711 pm_token_buffer_push_byte(&token_buffer,
'\\');
11713 pm_token_buffer_push_byte(&token_buffer,
'\r');
11719 pm_token_buffer_push_byte(&token_buffer,
'\\');
11720 pm_token_buffer_push_byte(&token_buffer,
'\n');
11727 parser_flush_heredoc_end(parser);
11728 pm_token_buffer_copy(parser, &token_buffer);
11729 LEX(PM_TOKEN_STRING_CONTENT);
11732 pm_line_offset_list_append(&parser->
line_offsets, PM_TOKEN_END(parser, &parser->
current) + 1);
11739 pm_token_buffer_push_byte(&token_buffer, peeked);
11742 pm_token_buffer_push_byte(&token_buffer, peeked);
11745 escape_read(parser, &token_buffer.
buffer, NULL, PM_ESCAPE_FLAG_NONE);
11747 pm_token_buffer_push_byte(&token_buffer,
'\\');
11748 pm_token_buffer_push_escaped(&token_buffer, parser);
11755 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11759 pm_token_type_t
type = lex_interpolation(parser, breakpoint);
11766 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11770 if (
type == PM_TOKEN_STRING_CONTENT) {
11771 pm_token_buffer_flush(parser, &token_buffer);
11777 assert(
false &&
"unreachable");
11782 pm_token_buffer_flush(parser, &token_buffer);
11783 LEX(PM_TOKEN_STRING_CONTENT);
11789 pm_token_buffer_flush(parser, &token_buffer);
11790 LEX(PM_TOKEN_STRING_CONTENT);
11792 case PM_LEX_HEREDOC: {
11819 lex_state_set(parser, PM_LEX_STATE_END);
11820 lex_mode_pop(parser);
11821 LEX(PM_TOKEN_HEREDOC_END);
11824 const uint8_t *ident_start = heredoc_lex_mode->
ident_start;
11829 if (current_token_starts_line(parser)) {
11830 const uint8_t *start = parser->
current.start;
11832 if (!line_continuation && (start + ident_length <= parser->end)) {
11833 const uint8_t *newline = next_newline(start, parser->
end - start);
11834 const uint8_t *ident_end = newline;
11835 const uint8_t *terminator_end = newline;
11837 if (newline == NULL) {
11838 terminator_end = parser->
end;
11839 ident_end = parser->
end;
11842 if (newline[-1] ==
'\r') {
11847 const uint8_t *terminator_start = ident_end - ident_length;
11848 const uint8_t *cursor = start;
11850 if (heredoc_lex_mode->
indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->
indent == PM_HEREDOC_INDENT_TILDE) {
11851 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11857 (cursor == terminator_start) &&
11858 (memcmp(terminator_start, ident_start, ident_length) == 0)
11860 if (newline != NULL) {
11861 pm_line_offset_list_append(&parser->
line_offsets, U32(newline - parser->
start + 1));
11864 parser->
current.end = terminator_end;
11872 lex_state_set(parser, PM_LEX_STATE_END);
11873 lex_mode_pop(parser);
11874 LEX(PM_TOKEN_HEREDOC_END);
11878 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, heredoc_lex_mode->
indent);
11880 heredoc_lex_mode->
indent == PM_HEREDOC_INDENT_TILDE &&
11883 peek_at(parser, start) !=
'\n'
11892 uint8_t breakpoints[] =
"\r\n\\#";
11895 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
11896 breakpoints[3] =
'\0';
11899 const uint8_t *breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11901 bool was_line_continuation =
false;
11903 while (breakpoint != NULL) {
11904 switch (*breakpoint) {
11907 parser->
current.end = breakpoint + 1;
11908 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11911 parser->
current.end = breakpoint + 1;
11913 if (peek_at(parser, breakpoint + 1) !=
'\n') {
11914 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
11921 pm_token_buffer_escape(parser, &token_buffer);
11922 token_buffer.
cursor = breakpoint;
11927 parser_flush_heredoc_end(parser);
11928 parser->
current.end = breakpoint + 1;
11929 pm_token_buffer_flush(parser, &token_buffer);
11930 LEX(PM_TOKEN_STRING_CONTENT);
11933 pm_line_offset_list_append(&parser->
line_offsets, U32(breakpoint - parser->
start + 1));
11937 const uint8_t *start = breakpoint + 1;
11939 if (!was_line_continuation && (start + ident_length <= parser->end)) {
11942 const uint8_t *newline = next_newline(start, parser->
end - start);
11944 if (newline == NULL) {
11945 newline = parser->
end;
11946 }
else if (newline[-1] ==
'\r') {
11951 const uint8_t *terminator_start = newline - ident_length;
11955 const uint8_t *cursor = start;
11957 if (heredoc_lex_mode->
indent == PM_HEREDOC_INDENT_DASH || heredoc_lex_mode->
indent == PM_HEREDOC_INDENT_TILDE) {
11958 while (cursor < terminator_start && pm_char_is_inline_whitespace(*cursor)) {
11964 cursor == terminator_start &&
11965 (memcmp(terminator_start, ident_start, ident_length) == 0)
11967 parser->
current.end = breakpoint + 1;
11968 pm_token_buffer_flush(parser, &token_buffer);
11969 LEX(PM_TOKEN_STRING_CONTENT);
11973 size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->
as.heredoc.
base.
indent);
11980 if (lex_mode->
as.heredoc.
base.
indent == PM_HEREDOC_INDENT_TILDE) {
11985 parser->
current.end = breakpoint + 1;
11986 pm_token_buffer_flush(parser, &token_buffer);
11987 LEX(PM_TOKEN_STRING_CONTENT);
11992 parser->
current.end = breakpoint + 1;
11993 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
12003 parser->
current.end = breakpoint + 1;
12012 pm_token_buffer_escape(parser, &token_buffer);
12013 uint8_t peeked = peek(parser);
12015 if (quote == PM_HEREDOC_QUOTE_SINGLE) {
12019 if (peek(parser) !=
'\n') {
12020 pm_token_buffer_push_byte(&token_buffer,
'\\');
12021 pm_token_buffer_push_byte(&token_buffer,
'\r');
12026 pm_token_buffer_push_byte(&token_buffer,
'\\');
12027 pm_token_buffer_push_byte(&token_buffer,
'\n');
12029 breakpoint = parser->
current.end;
12032 pm_token_buffer_push_byte(&token_buffer,
'\\');
12033 pm_token_buffer_push_escaped(&token_buffer, parser);
12040 if (peek(parser) !=
'\n') {
12041 pm_token_buffer_push_byte(&token_buffer,
'\r');
12049 if (heredoc_lex_mode->
indent == PM_HEREDOC_INDENT_TILDE) {
12050 const uint8_t *end = parser->
current.end;
12053 pm_line_offset_list_append(&parser->
line_offsets, U32(end - parser->
start + 1));
12058 parser->
current.end = breakpoint;
12059 pm_token_buffer_flush(parser, &token_buffer);
12063 parser->
current.end = end + 1;
12065 LEX(PM_TOKEN_STRING_CONTENT);
12068 was_line_continuation =
true;
12070 breakpoint = parser->
current.end;
12073 escape_read(parser, &token_buffer.
buffer, NULL, PM_ESCAPE_FLAG_NONE);
12079 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
12083 pm_token_type_t
type = lex_interpolation(parser, breakpoint);
12091 breakpoint = pm_strpbrk(parser, parser->
current.end, breakpoints, parser->
end - parser->
current.end,
true);
12095 if (
type == PM_TOKEN_STRING_CONTENT) {
12096 pm_token_buffer_flush(parser, &token_buffer);
12102 assert(
false &&
"unreachable");
12105 was_line_continuation =
false;
12110 pm_token_buffer_flush(parser, &token_buffer);
12111 LEX(PM_TOKEN_STRING_CONTENT);
12117 pm_token_buffer_flush(parser, &token_buffer);
12118 LEX(PM_TOKEN_STRING_CONTENT);
12122 assert(
false &&
"unreachable");
12140 PM_BINDING_POWER_UNSET = 0,
12141 PM_BINDING_POWER_STATEMENT = 2,
12142 PM_BINDING_POWER_MODIFIER_RESCUE = 4,
12143 PM_BINDING_POWER_MODIFIER = 6,
12144 PM_BINDING_POWER_COMPOSITION = 8,
12145 PM_BINDING_POWER_NOT = 10,
12146 PM_BINDING_POWER_MATCH = 12,
12147 PM_BINDING_POWER_DEFINED = 14,
12148 PM_BINDING_POWER_MULTI_ASSIGNMENT = 16,
12149 PM_BINDING_POWER_ASSIGNMENT = 18,
12150 PM_BINDING_POWER_TERNARY = 20,
12151 PM_BINDING_POWER_RANGE = 22,
12152 PM_BINDING_POWER_LOGICAL_OR = 24,
12153 PM_BINDING_POWER_LOGICAL_AND = 26,
12154 PM_BINDING_POWER_EQUALITY = 28,
12155 PM_BINDING_POWER_COMPARISON = 30,
12156 PM_BINDING_POWER_BITWISE_OR = 32,
12157 PM_BINDING_POWER_BITWISE_AND = 34,
12158 PM_BINDING_POWER_SHIFT = 36,
12159 PM_BINDING_POWER_TERM = 38,
12160 PM_BINDING_POWER_FACTOR = 40,
12161 PM_BINDING_POWER_UMINUS = 42,
12162 PM_BINDING_POWER_EXPONENT = 44,
12163 PM_BINDING_POWER_UNARY = 46,
12164 PM_BINDING_POWER_INDEX = 48,
12165 PM_BINDING_POWER_CALL = 50,
12166 PM_BINDING_POWER_MAX = 52
12167} pm_binding_power_t;
12190#define BINDING_POWER_ASSIGNMENT { PM_BINDING_POWER_UNARY, PM_BINDING_POWER_ASSIGNMENT, true, false }
12191#define LEFT_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, false }
12192#define RIGHT_ASSOCIATIVE(precedence) { precedence, precedence, true, false }
12193#define NON_ASSOCIATIVE(precedence) { precedence, precedence + 1, true, true }
12194#define RIGHT_ASSOCIATIVE_UNARY(precedence) { precedence, precedence, false, false }
12198 [PM_TOKEN_KEYWORD_RESCUE_MODIFIER] = { PM_BINDING_POWER_MODIFIER_RESCUE, PM_BINDING_POWER_COMPOSITION,
true,
false },
12201 [PM_TOKEN_KEYWORD_IF_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12202 [PM_TOKEN_KEYWORD_UNLESS_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12203 [PM_TOKEN_KEYWORD_UNTIL_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12204 [PM_TOKEN_KEYWORD_WHILE_MODIFIER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_MODIFIER),
12207 [PM_TOKEN_KEYWORD_AND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12208 [PM_TOKEN_KEYWORD_OR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPOSITION),
12211 [PM_TOKEN_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12212 [PM_TOKEN_KEYWORD_IN] = NON_ASSOCIATIVE(PM_BINDING_POWER_MATCH),
12215 [PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12216 [PM_TOKEN_AMPERSAND_EQUAL] = BINDING_POWER_ASSIGNMENT,
12217 [PM_TOKEN_CARET_EQUAL] = BINDING_POWER_ASSIGNMENT,
12218 [PM_TOKEN_EQUAL] = BINDING_POWER_ASSIGNMENT,
12219 [PM_TOKEN_GREATER_GREATER_EQUAL] = BINDING_POWER_ASSIGNMENT,
12220 [PM_TOKEN_LESS_LESS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12221 [PM_TOKEN_MINUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12222 [PM_TOKEN_PERCENT_EQUAL] = BINDING_POWER_ASSIGNMENT,
12223 [PM_TOKEN_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12224 [PM_TOKEN_PIPE_PIPE_EQUAL] = BINDING_POWER_ASSIGNMENT,
12225 [PM_TOKEN_PLUS_EQUAL] = BINDING_POWER_ASSIGNMENT,
12226 [PM_TOKEN_SLASH_EQUAL] = BINDING_POWER_ASSIGNMENT,
12227 [PM_TOKEN_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12228 [PM_TOKEN_STAR_STAR_EQUAL] = BINDING_POWER_ASSIGNMENT,
12231 [PM_TOKEN_QUESTION_MARK] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_TERNARY),
12234 [PM_TOKEN_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12235 [PM_TOKEN_DOT_DOT_DOT] = NON_ASSOCIATIVE(PM_BINDING_POWER_RANGE),
12236 [PM_TOKEN_UDOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12237 [PM_TOKEN_UDOT_DOT_DOT] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_LOGICAL_OR),
12240 [PM_TOKEN_PIPE_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_OR),
12243 [PM_TOKEN_AMPERSAND_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_LOGICAL_AND),
12246 [PM_TOKEN_BANG_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12247 [PM_TOKEN_BANG_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12248 [PM_TOKEN_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12249 [PM_TOKEN_EQUAL_EQUAL_EQUAL] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12250 [PM_TOKEN_EQUAL_TILDE] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12251 [PM_TOKEN_LESS_EQUAL_GREATER] = NON_ASSOCIATIVE(PM_BINDING_POWER_EQUALITY),
12254 [PM_TOKEN_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12255 [PM_TOKEN_GREATER_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12256 [PM_TOKEN_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12257 [PM_TOKEN_LESS_EQUAL] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_COMPARISON),
12260 [PM_TOKEN_CARET] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12261 [PM_TOKEN_PIPE] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_OR),
12264 [PM_TOKEN_AMPERSAND] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_BITWISE_AND),
12267 [PM_TOKEN_GREATER_GREATER] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12268 [PM_TOKEN_LESS_LESS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_SHIFT),
12271 [PM_TOKEN_MINUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12272 [PM_TOKEN_PLUS] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_TERM),
12275 [PM_TOKEN_PERCENT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12276 [PM_TOKEN_SLASH] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12277 [PM_TOKEN_STAR] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_FACTOR),
12278 [PM_TOKEN_USTAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_FACTOR),
12281 [PM_TOKEN_UMINUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UMINUS),
12282 [PM_TOKEN_UMINUS_NUM] = { PM_BINDING_POWER_UMINUS, PM_BINDING_POWER_MAX,
false,
false },
12285 [PM_TOKEN_STAR_STAR] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_EXPONENT),
12286 [PM_TOKEN_USTAR_STAR] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12289 [PM_TOKEN_BANG] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12290 [PM_TOKEN_TILDE] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12291 [PM_TOKEN_UPLUS] = RIGHT_ASSOCIATIVE_UNARY(PM_BINDING_POWER_UNARY),
12294 [PM_TOKEN_BRACKET_LEFT] = LEFT_ASSOCIATIVE(PM_BINDING_POWER_INDEX),
12297 [PM_TOKEN_COLON_COLON] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12298 [PM_TOKEN_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL),
12299 [PM_TOKEN_AMPERSAND_DOT] = RIGHT_ASSOCIATIVE(PM_BINDING_POWER_CALL)
12302#undef BINDING_POWER_ASSIGNMENT
12303#undef LEFT_ASSOCIATIVE
12304#undef RIGHT_ASSOCIATIVE
12305#undef RIGHT_ASSOCIATIVE_UNARY
12319match2(
const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12320 return match1(parser, type1) || match1(parser, type2);
12327match3(
const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
12328 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3);
12335match4(
const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4) {
12336 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
12343match7(
const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7) {
12344 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7);
12351match8(
const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8) {
12352 return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
12363 if (match1(parser,
type)) {
12364 parser_lex(parser);
12375accept2(
pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
12376 if (match2(parser, type1, type2)) {
12377 parser_lex(parser);
12396 if (accept1(parser,
type))
return;
12399 pm_parser_err(parser, U32(location - parser->
start), 0, diag_id);
12411 if (accept2(parser, type1, type2))
return;
12414 pm_parser_err(parser, U32(location - parser->
start), 0, diag_id);
12425expect1_heredoc_term(
pm_parser_t *parser,
const uint8_t *ident_start,
size_t ident_length) {
12426 if (match1(parser, PM_TOKEN_HEREDOC_END)) {
12427 parser_lex(parser);
12429 pm_parser_err_heredoc_term(parser, ident_start, ident_length);
12443 if (accept1(parser,
type))
return;
12445 const uint8_t *start = opening->
start;
12446 pm_parser_err(parser, U32(start - parser->
start), U32(opening->
end - start), diag_id);
12453parse_expression(
pm_parser_t *parser, pm_binding_power_t binding_power,
bool accepts_command_call,
bool accepts_label,
pm_diagnostic_id_t diag_id, uint16_t depth);
12460parse_value_expression(
pm_parser_t *parser, pm_binding_power_t binding_power,
bool accepts_command_call,
bool accepts_label,
pm_diagnostic_id_t diag_id, uint16_t depth) {
12461 pm_node_t *node = parse_expression(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
12462 pm_assert_value_expression(parser, node);
12485token_begins_expression_p(pm_token_type_t
type) {
12487 case PM_TOKEN_EQUAL_GREATER:
12488 case PM_TOKEN_KEYWORD_IN:
12492 case PM_TOKEN_BRACE_RIGHT:
12493 case PM_TOKEN_BRACKET_RIGHT:
12494 case PM_TOKEN_COLON:
12495 case PM_TOKEN_COMMA:
12496 case PM_TOKEN_EMBEXPR_END:
12498 case PM_TOKEN_LAMBDA_BEGIN:
12499 case PM_TOKEN_KEYWORD_DO:
12500 case PM_TOKEN_KEYWORD_DO_LOOP:
12501 case PM_TOKEN_KEYWORD_END:
12502 case PM_TOKEN_KEYWORD_ELSE:
12503 case PM_TOKEN_KEYWORD_ELSIF:
12504 case PM_TOKEN_KEYWORD_ENSURE:
12505 case PM_TOKEN_KEYWORD_THEN:
12506 case PM_TOKEN_KEYWORD_RESCUE:
12507 case PM_TOKEN_KEYWORD_WHEN:
12508 case PM_TOKEN_NEWLINE:
12509 case PM_TOKEN_PARENTHESIS_RIGHT:
12510 case PM_TOKEN_SEMICOLON:
12516 assert(pm_binding_powers[
type].left == PM_BINDING_POWER_UNSET);
12518 case PM_TOKEN_UAMPERSAND:
12522 case PM_TOKEN_UCOLON_COLON:
12523 case PM_TOKEN_UMINUS:
12524 case PM_TOKEN_UMINUS_NUM:
12525 case PM_TOKEN_UPLUS:
12526 case PM_TOKEN_BANG:
12527 case PM_TOKEN_TILDE:
12528 case PM_TOKEN_UDOT_DOT:
12529 case PM_TOKEN_UDOT_DOT_DOT:
12536 return pm_binding_powers[
type].
left == PM_BINDING_POWER_UNSET;
12545parse_starred_expression(
pm_parser_t *parser, pm_binding_power_t binding_power,
bool accepts_command_call,
pm_diagnostic_id_t diag_id, uint16_t depth) {
12546 if (accept1(parser, PM_TOKEN_USTAR)) {
12548 pm_node_t *expression = parse_value_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
12549 return UP(pm_splat_node_create(parser, &
operator, expression));
12552 return parse_value_expression(parser, binding_power, accepts_command_call,
false, diag_id, depth);
12556pm_node_unreference_each(
const pm_node_t *node,
void *data) {
12557 switch (PM_NODE_TYPE(node)) {
12562 case PM_BREAK_NODE:
12564 case PM_REDO_NODE: {
12568 while (index < parser->current_block_exits->size) {
12571 if (block_exit == node) {
12594 case PM_LOCAL_VARIABLE_READ_NODE:
12595 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12599 for (
size_t index = 0; index < implicit_parameters->
size; index++) {
12600 if (implicit_parameters->
nodes[index] == node) {
12604 if (index != implicit_parameters->
size - 1) {
12605 memmove(&implicit_parameters->
nodes[index], &implicit_parameters->
nodes[index + 1], (implicit_parameters->
size - index - 1) *
sizeof(
pm_node_t *));
12608 implicit_parameters->
size--;
12627 pm_visit_node(node, pm_node_unreference_each, parser);
12641 size_t length = constant->
length;
12642 uint8_t *name =
xcalloc(length + 1,
sizeof(uint8_t));
12643 if (name == NULL)
return;
12645 memcpy(name, constant->
start, length);
12646 name[length] =
'=';
12651 *name_field = pm_constant_pool_insert_owned(&parser->
constant_pool, name, length + 1);
12662 switch (PM_NODE_TYPE(target)) {
12663 case PM_SOURCE_ENCODING_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING);
break;
12664 case PM_FALSE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE);
break;
12665 case PM_SOURCE_FILE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE);
break;
12666 case PM_SOURCE_LINE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE);
break;
12667 case PM_NIL_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL);
break;
12668 case PM_SELF_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF);
break;
12669 case PM_TRUE_NODE: pm_parser_err_node(parser, target, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE);
break;
12673 pm_constant_id_t name = pm_parser_constant_id_raw(parser, parser->
start + PM_NODE_START(target), parser->
start + PM_NODE_END(target));
12689 switch (PM_NODE_TYPE(target)) {
12690 case PM_MISSING_NODE:
12692 case PM_SOURCE_ENCODING_NODE:
12693 case PM_FALSE_NODE:
12694 case PM_SOURCE_FILE_NODE:
12695 case PM_SOURCE_LINE_NODE:
12698 case PM_TRUE_NODE: {
12701 return parse_unwriteable_target(parser, target);
12703 case PM_CLASS_VARIABLE_READ_NODE:
12705 target->
type = PM_CLASS_VARIABLE_TARGET_NODE;
12707 case PM_CONSTANT_PATH_NODE:
12708 if (context_def_p(parser)) {
12709 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12713 target->
type = PM_CONSTANT_PATH_TARGET_NODE;
12716 case PM_CONSTANT_READ_NODE:
12717 if (context_def_p(parser)) {
12718 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_IN_METHOD);
12722 target->
type = PM_CONSTANT_TARGET_NODE;
12725 case PM_BACK_REFERENCE_READ_NODE:
12726 case PM_NUMBERED_REFERENCE_READ_NODE:
12727 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12729 case PM_GLOBAL_VARIABLE_READ_NODE:
12731 target->
type = PM_GLOBAL_VARIABLE_TARGET_NODE;
12733 case PM_LOCAL_VARIABLE_READ_NODE: {
12734 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12735 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->
start + PM_NODE_START(target));
12736 pm_node_unreference(parser, target);
12740 uint32_t name = cast->
name;
12741 uint32_t depth = cast->
depth;
12742 pm_locals_unread(&pm_parser_scope_find(parser, depth)->locals, name);
12745 target->
type = PM_LOCAL_VARIABLE_TARGET_NODE;
12749 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12751 pm_node_t *node = UP(pm_local_variable_target_node_create(parser, &target->
location, name, 0));
12753 pm_node_unreference(parser, target);
12757 case PM_INSTANCE_VARIABLE_READ_NODE:
12759 target->
type = PM_INSTANCE_VARIABLE_TARGET_NODE;
12761 case PM_MULTI_TARGET_NODE:
12762 if (splat_parent) {
12765 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12769 case PM_SPLAT_NODE: {
12778 case PM_CALL_NODE: {
12790 (call->
block == NULL)
12803 pm_constant_id_t name = pm_parser_local_add_location(parser, &message_loc, 0);
12805 return UP(pm_local_variable_target_node_create(parser, &message_loc, name, 0));
12809 if (multiple && PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_SAFE_NAVIGATION)) {
12810 pm_parser_err_node(parser, (
const pm_node_t *) call, PM_ERR_UNEXPECTED_SAFE_NAVIGATION);
12813 parse_write_name(parser, &call->
name);
12814 return UP(pm_call_target_node_create(parser, call));
12821 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
12822 return UP(pm_index_target_node_create(parser, call));
12830 pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
12841 pm_node_t *result = parse_target(parser, target, multiple,
false);
12846 !match1(parser, PM_TOKEN_EQUAL) &&
12848 !(context_p(parser,
PM_CONTEXT_PARENS) && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
12850 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
12864 if (shareable_constant != PM_SCOPE_SHAREABLE_CONSTANT_NONE) {
12865 return UP(pm_shareable_constant_node_create(parser, write, shareable_constant));
12876 switch (PM_NODE_TYPE(target)) {
12877 case PM_MISSING_NODE:
12879 case PM_CLASS_VARIABLE_READ_NODE: {
12883 case PM_CONSTANT_PATH_NODE: {
12886 if (context_def_p(parser)) {
12887 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12890 return parse_shareable_constant_write(parser, node);
12892 case PM_CONSTANT_READ_NODE: {
12895 if (context_def_p(parser)) {
12896 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_IN_METHOD);
12899 return parse_shareable_constant_write(parser, node);
12901 case PM_BACK_REFERENCE_READ_NODE:
12902 case PM_NUMBERED_REFERENCE_READ_NODE:
12903 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
12905 case PM_GLOBAL_VARIABLE_READ_NODE: {
12909 case PM_LOCAL_VARIABLE_READ_NODE: {
12914 uint32_t depth = local_read->
depth;
12915 pm_scope_t *scope = pm_parser_scope_find(parser, depth);
12917 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(target), PM_NODE_LENGTH(target))) {
12918 pm_diagnostic_id_t diag_id = (scope->
parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND) ? PM_ERR_EXPRESSION_NOT_WRITABLE_NUMBERED : PM_ERR_PARAMETER_NUMBERED_RESERVED;
12919 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(target), PM_NODE_LENGTH(target), diag_id, parser->
start + PM_NODE_START(target));
12920 pm_node_unreference(parser, target);
12923 pm_locals_unread(&scope->
locals, name);
12925 return UP(pm_local_variable_write_node_create(parser, name, depth, value, &location,
operator));
12927 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
12929 pm_node_t *node = UP(pm_local_variable_write_node_create(parser, name, 0, value, &target->
location,
operator));
12931 pm_node_unreference(parser, target);
12935 case PM_INSTANCE_VARIABLE_READ_NODE: {
12939 case PM_MULTI_TARGET_NODE:
12941 case PM_SPLAT_NODE: {
12949 pm_multi_target_node_targets_append(parser, multi_target, UP(splat));
12951 return UP(pm_multi_write_node_create(parser, multi_target,
operator, value));
12953 case PM_CALL_NODE: {
12965 (call->
block == NULL)
12979 pm_refute_numbered_parameter(parser, message_loc.
start, message_loc.
length);
12980 pm_parser_local_add_location(parser, &message_loc, 0);
12982 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, parser->
start + PM_LOCATION_START(&message_loc), parser->
start + PM_LOCATION_END(&message_loc));
12983 target = UP(pm_local_variable_write_node_create(parser, constant_id, 0, value, &message_loc,
operator));
13002 pm_arguments_node_arguments_append(parser->
arena, arguments, value);
13003 PM_NODE_LENGTH_SET_NODE(call, arguments);
13004 call->
equal_loc = TOK2LOC(parser,
operator);
13006 parse_write_name(parser, &call->
name);
13007 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13016 if (PM_NODE_FLAG_P(call, PM_CALL_NODE_FLAGS_INDEX)) {
13018 call->
arguments = pm_arguments_node_create(parser);
13021 pm_arguments_node_arguments_append(parser->
arena, call->
arguments, value);
13022 PM_NODE_LENGTH_SET_NODE(target, value);
13025 call->
name = pm_parser_constant_id_constant(parser,
"[]=", 3);
13026 call->
equal_loc = TOK2LOC(parser,
operator);
13030 pm_node_flag_set(UP(call), PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13045 pm_node_unreference(parser, value);
13052 pm_parser_err_token(parser,
operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
13065 switch (PM_NODE_TYPE(target)) {
13066 case PM_SOURCE_ENCODING_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_ENCODING);
break;
13067 case PM_FALSE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FALSE);
break;
13068 case PM_SOURCE_FILE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_FILE);
break;
13069 case PM_SOURCE_LINE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_LINE);
break;
13070 case PM_NIL_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_NIL);
break;
13071 case PM_SELF_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_SELF);
break;
13072 case PM_TRUE_NODE: pm_parser_err_token(parser, equals, PM_ERR_EXPRESSION_NOT_WRITABLE_TRUE);
break;
13093parse_targets(
pm_parser_t *parser,
pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13094 bool has_rest = PM_NODE_TYPE_P(first_target, PM_SPLAT_NODE);
13097 pm_multi_target_node_targets_append(parser, result, parse_target(parser, first_target,
true,
false));
13099 while (accept1(parser, PM_TOKEN_COMMA)) {
13100 if (accept1(parser, PM_TOKEN_USTAR)) {
13105 pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
13111 if (token_begins_expression_p(parser->
current.type)) {
13112 name = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
13113 name = parse_target(parser, name,
true,
true);
13116 pm_node_t *splat = UP(pm_splat_node_create(parser, &star_operator, name));
13117 pm_multi_target_node_targets_append(parser, result, splat);
13119 }
else if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13121 pm_node_t *target = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13122 target = parse_target(parser, target,
true,
false);
13124 pm_multi_target_node_targets_append(parser, result, target);
13125 context_pop(parser);
13126 }
else if (token_begins_expression_p(parser->
current.type)) {
13127 pm_node_t *target = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
13128 target = parse_target(parser, target,
true,
false);
13130 pm_multi_target_node_targets_append(parser, result, target);
13131 }
else if (!match1(parser, PM_TOKEN_EOF)) {
13135 pm_multi_target_node_targets_append(parser, result, rest);
13148parse_targets_validate(
pm_parser_t *parser,
pm_node_t *first_target, pm_binding_power_t binding_power, uint16_t depth) {
13149 pm_node_t *result = parse_targets(parser, first_target, binding_power, depth);
13150 accept1(parser, PM_TOKEN_NEWLINE);
13153 if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
13154 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
13167 while (accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE));
13170 if (context_terminator(context, &parser->
current))
return NULL;
13176 context_push(parser, context);
13179 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT,
true,
false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
13180 pm_statements_node_body_append(parser, statements, node,
true);
13193 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
13196 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13197 if (context_terminator(context, &parser->
current))
break;
13207 if (context_terminator(context, &parser->
current))
break;
13219 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
13220 parser_lex(parser);
13226 if (match1(parser, PM_TOKEN_EOF)) {
13231 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
13232 if (context_terminator(context, &parser->
current))
break;
13233 }
else if (!accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
13243 context_pop(parser);
13244 bool last_value =
true;
13248 last_value =
false;
13253 pm_void_statements_check(parser, statements, last_value);
13266 if (duplicated != NULL) {
13270 pm_diagnostic_list_append_format(
13274 PM_WARN_DUPLICATED_HASH_KEY,
13292 if ((previous = pm_static_literals_add(&parser->
line_offsets, parser->
start, parser->
start_line, literals, node,
false)) != NULL) {
13293 pm_diagnostic_list_append_format(
13295 PM_NODE_START(node),
13296 PM_NODE_LENGTH(node),
13297 PM_WARN_DUPLICATED_WHEN_CLAUSE,
13309 assert(PM_NODE_TYPE_P(node, PM_HASH_NODE) || PM_NODE_TYPE_P(node, PM_KEYWORD_HASH_NODE));
13310 bool contains_keyword_splat =
false;
13315 switch (parser->
current.type) {
13316 case PM_TOKEN_USTAR_STAR: {
13317 parser_lex(parser);
13321 if (match1(parser, PM_TOKEN_BRACE_LEFT)) {
13327 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13328 }
else if (token_begins_expression_p(parser->
current.type)) {
13329 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH, (uint16_t) (depth + 1));
13331 pm_parser_scope_forwarding_keywords_check(parser, &
operator);
13334 element = UP(pm_assoc_splat_node_create(parser, value, &
operator));
13335 contains_keyword_splat =
true;
13338 case PM_TOKEN_LABEL: {
13340 parser_lex(parser);
13342 pm_node_t *key = UP(pm_symbol_node_label_create(parser, &label));
13343 pm_hash_key_static_literals_add(parser, literals, key);
13347 if (token_begins_expression_p(parser->
current.type)) {
13348 value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_HASH_EXPRESSION_AFTER_LABEL, (uint16_t) (depth + 1));
13351 pm_token_t constant = { .type = PM_TOKEN_CONSTANT, .start = label.
start, .end = label.
end - 1 };
13352 value = UP(pm_constant_read_node_create(parser, &constant));
13357 if (identifier.
end[-1] ==
'!' || identifier.
end[-1] ==
'?') {
13358 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &identifier, PM_ERR_INVALID_LOCAL_VARIABLE_READ);
13360 depth = pm_parser_local_depth(parser, &identifier);
13364 value = UP(pm_call_node_variable_call_create(parser, &identifier));
13366 value = UP(pm_local_variable_read_node_create(parser, &identifier, (uint32_t) depth));
13371 value = UP(pm_implicit_node_create(parser, value));
13374 element = UP(pm_assoc_node_create(parser, key, NULL, value));
13378 pm_node_t *key = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
true, PM_ERR_HASH_KEY, (uint16_t) (depth + 1));
13382 if (PM_NODE_TYPE_P(key, PM_STRING_NODE)) {
13383 pm_node_flag_set(key, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
13386 pm_hash_key_static_literals_add(parser, literals, key);
13389 if (!pm_symbol_node_label_p(parser, key)) {
13390 expect1(parser, PM_TOKEN_EQUAL_GREATER, PM_ERR_HASH_ROCKET);
13394 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13395 element = UP(pm_assoc_node_create(parser, key, NTOK2PTR(
operator), value));
13400 if (PM_NODE_TYPE_P(node, PM_HASH_NODE)) {
13407 if (!accept1(parser, PM_TOKEN_COMMA))
break;
13411 if (match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL))
continue;
13415 if (token_begins_expression_p(parser->
current.type))
continue;
13421 return contains_keyword_splat;
13426 if (pm_symbol_node_label_p(parser, argument)) {
13430 switch (PM_NODE_TYPE(argument)) {
13431 case PM_CALL_NODE: {
13434 if (PM_NODE_FLAG_P(cast->
arguments, PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS | PM_ARGUMENTS_NODE_FLAGS_CONTAINS_SPLAT)) {
13437 if (cast->
block != NULL) {
13445 return accept1(parser, PM_TOKEN_EQUAL_GREATER);
13454 arguments->
arguments = pm_arguments_node_create(parser);
13457 pm_arguments_node_arguments_append(parser->
arena, arguments->
arguments, argument);
13464parse_arguments(
pm_parser_t *parser,
pm_arguments_t *arguments,
bool accepts_forwarding, pm_token_type_t terminator, uint16_t depth) {
13465 pm_binding_power_t binding_power = pm_binding_powers[parser->
current.type].
left;
13470 match2(parser, terminator, PM_TOKEN_EOF) ||
13471 (binding_power != PM_BINDING_POWER_UNSET && binding_power < PM_BINDING_POWER_RANGE) ||
13477 bool parsed_first_argument =
false;
13478 bool parsed_bare_hash =
false;
13479 bool parsed_block_argument =
false;
13480 bool parsed_forwarding_arguments =
false;
13482 while (!match1(parser, PM_TOKEN_EOF)) {
13483 if (parsed_forwarding_arguments) {
13484 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_FORWARDING_ELLIPSES);
13489 switch (parser->
current.type) {
13490 case PM_TOKEN_USTAR_STAR:
13491 case PM_TOKEN_LABEL: {
13492 if (parsed_bare_hash) {
13493 pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
13497 argument = UP(hash);
13500 bool contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(hash), (uint16_t) (depth + 1));
13502 parse_arguments_append(parser, arguments, argument);
13504 pm_node_flags_t flags = PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13505 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13506 pm_node_flag_set(UP(arguments->
arguments), flags);
13508 pm_static_literals_free(&hash_keys);
13509 parsed_bare_hash =
true;
13513 case PM_TOKEN_UAMPERSAND: {
13514 parser_lex(parser);
13518 if (token_begins_expression_p(parser->
current.type)) {
13519 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13521 pm_parser_scope_forwarding_block_check(parser, &
operator);
13524 argument = UP(pm_block_argument_node_create(parser, &
operator, expression));
13525 if (parsed_block_argument) {
13526 parse_arguments_append(parser, arguments, argument);
13528 arguments->
block = argument;
13531 if (match1(parser, PM_TOKEN_COMMA)) {
13532 pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
13535 parsed_block_argument =
true;
13538 case PM_TOKEN_USTAR: {
13539 parser_lex(parser);
13542 if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
13543 pm_parser_scope_forwarding_positionals_check(parser, &
operator);
13544 argument = UP(pm_splat_node_create(parser, &
operator, NULL));
13545 if (parsed_bare_hash) {
13546 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13549 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
13551 if (parsed_bare_hash) {
13552 pm_parser_err(parser, PM_TOKEN_START(parser, &
operator), PM_NODE_END(expression) - PM_TOKEN_START(parser, &
operator), PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
13555 argument = UP(pm_splat_node_create(parser, &
operator, expression));
13558 parse_arguments_append(parser, arguments, argument);
13561 case PM_TOKEN_UDOT_DOT_DOT: {
13562 if (accepts_forwarding) {
13563 parser_lex(parser);
13565 if (token_begins_expression_p(parser->
current.type)) {
13570 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_RANGE,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
13575 if (PM_NODE_TYPE_P(right, PM_RANGE_NODE)) {
13577 pm_parser_err(parser, range->operator_loc.start, range->operator_loc.length, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
13580 argument = UP(pm_range_node_create(parser, NULL, &
operator, right));
13582 pm_parser_scope_forwarding_all_check(parser, &parser->
previous);
13583 if (parsed_first_argument && terminator == PM_TOKEN_EOF) {
13584 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORWARDING_UNBOUND);
13587 argument = UP(pm_forwarding_arguments_node_create(parser, &parser->
previous));
13588 parse_arguments_append(parser, arguments, argument);
13589 pm_node_flag_set(UP(arguments->
arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_FORWARDING);
13591 parsed_forwarding_arguments =
true;
13598 if (argument == NULL) {
13599 argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument,
true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
13602 bool contains_keywords =
false;
13603 bool contains_keyword_splat =
false;
13605 if (argument_allowed_for_bare_hash(parser, argument)) {
13606 if (parsed_bare_hash) {
13607 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
13611 if (parser->
previous.
type == PM_TOKEN_EQUAL_GREATER) {
13616 contains_keywords =
true;
13620 pm_hash_key_static_literals_add(parser, &hash_keys, argument);
13623 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
13624 argument = UP(pm_assoc_node_create(parser, argument, NTOK2PTR(
operator), value));
13626 pm_keyword_hash_node_elements_append(parser->
arena, bare_hash, argument);
13627 argument = UP(bare_hash);
13630 if (accept1(parser, PM_TOKEN_COMMA) && (
13631 token_begins_expression_p(parser->
current.type) ||
13632 match2(parser, PM_TOKEN_USTAR_STAR, PM_TOKEN_LABEL)
13634 contains_keyword_splat = parse_assocs(parser, &hash_keys, UP(bare_hash), (uint16_t) (depth + 1));
13637 pm_static_literals_free(&hash_keys);
13638 parsed_bare_hash =
true;
13641 parse_arguments_append(parser, arguments, argument);
13643 pm_node_flags_t flags = 0;
13644 if (contains_keywords) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS;
13645 if (contains_keyword_splat) flags |= PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORD_SPLAT;
13646 pm_node_flag_set(UP(arguments->
arguments), flags);
13652 parsed_first_argument =
true;
13655 if (PM_NODE_TYPE_P(argument, PM_MISSING_NODE) || parser->
recovering)
break;
13660 bool accepted_newline =
false;
13661 if (terminator != PM_TOKEN_EOF) {
13662 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
13665 if (parser->
previous.
type == PM_TOKEN_COMMA && parsed_bare_hash) {
13669 }
else if (accept1(parser, PM_TOKEN_COMMA)) {
13672 if (accepted_newline) {
13673 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13679 if (PM_NODE_TYPE_P(argument, PM_CALL_NODE)) {
13682 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
13694 if (match1(parser, terminator))
break;
13709parse_required_destructured_parameter(
pm_parser_t *parser) {
13710 expect1(parser, PM_TOKEN_PARENTHESIS_LEFT, PM_ERR_EXPECT_LPAREN_REQ_PARAMETER);
13713 pm_multi_target_node_opening_set(parser, node, &parser->
previous);
13722 if (node->
lefts.
size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
13723 param = UP(pm_implicit_rest_node_create(parser, &parser->
previous));
13724 pm_multi_target_node_targets_append(parser, node, param);
13725 pm_parser_err_current(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13729 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
13730 param = UP(parse_required_destructured_parameter(parser));
13731 }
else if (accept1(parser, PM_TOKEN_USTAR)) {
13735 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13737 value = UP(pm_required_parameter_node_create(parser, &name));
13738 if (pm_parser_parameter_name_check(parser, &name)) {
13739 pm_node_flag_set_repeated_parameter(value);
13741 pm_parser_local_add_token(parser, &name, 1);
13744 param = UP(pm_splat_node_create(parser, &star, value));
13746 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EXPECT_IDENT_REQ_PARAMETER);
13749 param = UP(pm_required_parameter_node_create(parser, &name));
13750 if (pm_parser_parameter_name_check(parser, &name)) {
13751 pm_node_flag_set_repeated_parameter(param);
13753 pm_parser_local_add_token(parser, &name, 1);
13756 pm_multi_target_node_targets_append(parser, node, param);
13757 }
while (accept1(parser, PM_TOKEN_COMMA));
13759 accept1(parser, PM_TOKEN_NEWLINE);
13760 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN_REQ_PARAMETER);
13761 pm_multi_target_node_closing_set(parser, node, &parser->
previous);
13771 PM_PARAMETERS_NO_CHANGE = 0,
13772 PM_PARAMETERS_ORDER_NOTHING_AFTER = 1,
13773 PM_PARAMETERS_ORDER_KEYWORDS_REST,
13774 PM_PARAMETERS_ORDER_KEYWORDS,
13775 PM_PARAMETERS_ORDER_REST,
13776 PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13777 PM_PARAMETERS_ORDER_OPTIONAL,
13778 PM_PARAMETERS_ORDER_NAMED,
13779 PM_PARAMETERS_ORDER_NONE,
13780} pm_parameters_order_t;
13785static pm_parameters_order_t parameters_ordering[PM_TOKEN_MAXIMUM] = {
13786 [0] = PM_PARAMETERS_NO_CHANGE,
13787 [PM_TOKEN_UAMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13788 [PM_TOKEN_AMPERSAND] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13789 [PM_TOKEN_UDOT_DOT_DOT] = PM_PARAMETERS_ORDER_NOTHING_AFTER,
13790 [PM_TOKEN_IDENTIFIER] = PM_PARAMETERS_ORDER_NAMED,
13791 [PM_TOKEN_PARENTHESIS_LEFT] = PM_PARAMETERS_ORDER_NAMED,
13792 [PM_TOKEN_EQUAL] = PM_PARAMETERS_ORDER_OPTIONAL,
13793 [PM_TOKEN_LABEL] = PM_PARAMETERS_ORDER_KEYWORDS,
13794 [PM_TOKEN_USTAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13795 [PM_TOKEN_STAR] = PM_PARAMETERS_ORDER_AFTER_OPTIONAL,
13796 [PM_TOKEN_USTAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST,
13797 [PM_TOKEN_STAR_STAR] = PM_PARAMETERS_ORDER_KEYWORDS_REST
13809 pm_parameters_order_t state = parameters_ordering[token->type];
13810 if (state == PM_PARAMETERS_NO_CHANGE)
return true;
13814 if (*current == PM_PARAMETERS_ORDER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13815 *current = PM_PARAMETERS_ORDER_AFTER_OPTIONAL;
13817 }
else if (*current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL && state == PM_PARAMETERS_ORDER_NAMED) {
13821 if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13822 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
13824 }
else if (token->type == PM_TOKEN_UDOT_DOT_DOT && (*current >= PM_PARAMETERS_ORDER_KEYWORDS_REST && *current <= PM_PARAMETERS_ORDER_AFTER_OPTIONAL)) {
13825 pm_parser_err_token(parser, token, *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL ? PM_ERR_PARAMETER_FORWARDING_AFTER_REST : PM_ERR_PARAMETER_ORDER);
13827 }
else if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
13829 pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
13833 if (state < *current) *current = state;
13838parse_parameters_handle_trailing_comma(
13841 pm_parameters_order_t order,
13843 bool allows_trailing_comma
13845 if (!allows_trailing_comma) {
13846 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13851 if (order >= PM_PARAMETERS_ORDER_NAMED) {
13853 pm_node_t *param = UP(pm_implicit_rest_node_create(parser, &parser->
previous));
13855 if (params->
rest == NULL) {
13856 pm_parameters_node_rest_set(params, param);
13858 pm_parser_err_node(parser, UP(param), PM_ERR_PARAMETER_SPLAT_MULTI);
13859 pm_parameters_node_posts_append(parser->
arena, params, UP(param));
13863 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13869 pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
13880 pm_binding_power_t binding_power,
13881 bool uses_parentheses,
13882 bool allows_trailing_comma,
13883 bool allows_forwarding_parameters,
13884 bool accepts_blocks_in_defaults,
13889 pm_do_loop_stack_push(parser,
false);
13892 pm_parameters_order_t order = PM_PARAMETERS_ORDER_NONE;
13895 bool parsing =
true;
13897 switch (parser->
current.type) {
13898 case PM_TOKEN_PARENTHESIS_LEFT: {
13899 update_parameter_state(parser, &parser->
current, &order);
13900 pm_node_t *param = UP(parse_required_destructured_parameter(parser));
13902 if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
13903 pm_parameters_node_requireds_append(parser->
arena, params, param);
13905 pm_parameters_node_posts_append(parser->
arena, params, param);
13909 case PM_TOKEN_UAMPERSAND:
13910 case PM_TOKEN_AMPERSAND: {
13911 update_parameter_state(parser, &parser->
current, &order);
13912 parser_lex(parser);
13918 param = (
pm_node_t *) pm_no_block_parameter_node_create(parser, &
operator, &parser->
previous);
13922 bool repeated =
false;
13923 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
13925 repeated = pm_parser_parameter_name_check(parser, &name);
13926 pm_parser_local_add_token(parser, &name, 1);
13931 param = (
pm_node_t *) pm_block_parameter_node_create(parser, NTOK2PTR(name), &
operator);
13933 pm_node_flag_set_repeated_parameter(param);
13937 if (params->
block == NULL) {
13938 pm_parameters_node_block_set(params, param);
13940 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_BLOCK_MULTI);
13941 pm_parameters_node_posts_append(parser->
arena, params, param);
13946 case PM_TOKEN_UDOT_DOT_DOT: {
13947 if (!allows_forwarding_parameters) {
13948 pm_parser_err_current(parser, diag_id_forwarding);
13951 bool succeeded = update_parameter_state(parser, &parser->
current, &order);
13952 parser_lex(parser);
13961 pm_parameters_node_posts_append(parser->
arena, params, keyword_rest);
13962 if (succeeded) pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
13966 pm_parameters_node_keyword_rest_set(params, UP(param));
13969 case PM_TOKEN_CLASS_VARIABLE:
13970 case PM_TOKEN_IDENTIFIER:
13971 case PM_TOKEN_CONSTANT:
13972 case PM_TOKEN_INSTANCE_VARIABLE:
13973 case PM_TOKEN_GLOBAL_VARIABLE:
13974 case PM_TOKEN_METHOD_NAME: {
13975 parser_lex(parser);
13977 case PM_TOKEN_CONSTANT:
13978 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
13980 case PM_TOKEN_INSTANCE_VARIABLE:
13981 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
13983 case PM_TOKEN_GLOBAL_VARIABLE:
13984 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
13986 case PM_TOKEN_CLASS_VARIABLE:
13987 pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
13989 case PM_TOKEN_METHOD_NAME:
13990 pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
13995 if (parser->
current.type == PM_TOKEN_EQUAL) {
13996 update_parameter_state(parser, &parser->
current, &order);
13998 update_parameter_state(parser, &parser->
previous, &order);
14002 bool repeated = pm_parser_parameter_name_check(parser, &name);
14003 pm_parser_local_add_token(parser, &name, 1);
14005 if (match1(parser, PM_TOKEN_EQUAL)) {
14008 parser_lex(parser);
14013 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser,
true);
14014 pm_node_t *value = parse_value_expression(parser, binding_power,
false,
false, PM_ERR_PARAMETER_NO_DEFAULT, (uint16_t) (depth + 1));
14015 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14020 pm_node_flag_set_repeated_parameter(UP(param));
14022 pm_parameters_node_optionals_append(parser->
arena, params, param);
14028 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &name, PM_ERR_PARAMETER_CIRCULAR);
14031 context_pop(parser);
14040 }
else if (order > PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
14043 pm_node_flag_set_repeated_parameter(UP(param));
14045 pm_parameters_node_requireds_append(parser->
arena, params, UP(param));
14049 pm_node_flag_set_repeated_parameter(UP(param));
14051 pm_parameters_node_posts_append(parser->
arena, params, UP(param));
14056 case PM_TOKEN_LABEL: {
14057 if (!uses_parentheses && !in_block) parser->
in_keyword_arg =
true;
14058 update_parameter_state(parser, &parser->
current, &order);
14061 parser_lex(parser);
14068 pm_parser_err(parser, PM_TOKEN_START(parser, &local), PM_TOKEN_LENGTH(&local), PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14069 }
else if (local.
end[-1] ==
'!' || local.
end[-1] ==
'?') {
14070 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE);
14073 bool repeated = pm_parser_parameter_name_check(parser, &local);
14074 pm_parser_local_add_token(parser, &local, 1);
14076 switch (parser->
current.type) {
14077 case PM_TOKEN_COMMA:
14078 case PM_TOKEN_PARENTHESIS_RIGHT:
14079 case PM_TOKEN_PIPE: {
14080 context_pop(parser);
14082 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14084 pm_node_flag_set_repeated_parameter(param);
14087 pm_parameters_node_keywords_append(parser->
arena, params, param);
14090 case PM_TOKEN_SEMICOLON:
14091 case PM_TOKEN_NEWLINE: {
14092 context_pop(parser);
14094 if (uses_parentheses) {
14099 pm_node_t *param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14101 pm_node_flag_set_repeated_parameter(param);
14104 pm_parameters_node_keywords_append(parser->
arena, params, param);
14110 if (token_begins_expression_p(parser->
current.type)) {
14114 if (accepts_blocks_in_defaults) pm_accepts_block_stack_push(parser,
true);
14115 pm_node_t *value = parse_value_expression(parser, binding_power,
false,
false, PM_ERR_PARAMETER_NO_DEFAULT_KW, (uint16_t) (depth + 1));
14116 if (accepts_blocks_in_defaults) pm_accepts_block_stack_pop(parser);
14119 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &local, PM_ERR_PARAMETER_CIRCULAR);
14122 param = UP(pm_optional_keyword_parameter_node_create(parser, &name, value));
14125 param = UP(pm_required_keyword_parameter_node_create(parser, &name));
14129 pm_node_flag_set_repeated_parameter(param);
14132 context_pop(parser);
14133 pm_parameters_node_keywords_append(parser->
arena, params, param);
14148 case PM_TOKEN_USTAR:
14149 case PM_TOKEN_STAR: {
14150 update_parameter_state(parser, &parser->
current, &order);
14151 parser_lex(parser);
14155 bool repeated =
false;
14157 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14159 repeated = pm_parser_parameter_name_check(parser, &name);
14160 pm_parser_local_add_token(parser, &name, 1);
14165 pm_node_t *param = UP(pm_rest_parameter_node_create(parser, &
operator, NTOK2PTR(name)));
14167 pm_node_flag_set_repeated_parameter(param);
14170 if (params->
rest == NULL) {
14171 pm_parameters_node_rest_set(params, param);
14173 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_SPLAT_MULTI);
14174 pm_parameters_node_posts_append(parser->
arena, params, param);
14179 case PM_TOKEN_STAR_STAR:
14180 case PM_TOKEN_USTAR_STAR: {
14181 pm_parameters_order_t previous_order = order;
14182 update_parameter_state(parser, &parser->
current, &order);
14183 parser_lex(parser);
14188 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
14189 if (previous_order <= PM_PARAMETERS_ORDER_KEYWORDS) {
14190 pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_NO_KW);
14193 param = UP(pm_no_keywords_parameter_node_create(parser, &
operator, &parser->
previous));
14197 bool repeated =
false;
14198 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
14200 repeated = pm_parser_parameter_name_check(parser, &name);
14201 pm_parser_local_add_token(parser, &name, 1);
14206 param = UP(pm_keyword_rest_parameter_node_create(parser, &
operator, NTOK2PTR(name)));
14208 pm_node_flag_set_repeated_parameter(param);
14213 pm_parameters_node_keyword_rest_set(params, param);
14215 pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
14216 pm_parameters_node_posts_append(parser->
arena, params, param);
14223 parse_parameters_handle_trailing_comma(parser, params, order, in_block, allows_trailing_comma);
14233 if (!parsing)
break;
14235 bool accepted_newline =
false;
14236 if (uses_parentheses) {
14237 accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
14240 if (accept1(parser, PM_TOKEN_COMMA)) {
14243 if (accepted_newline) {
14244 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
14252 pm_do_loop_stack_pop(parser);
14255 if (PM_NODE_START(params) == PM_NODE_END(params)) {
14278 return (
size_t) pm_line_offset_list_line(&parser->
line_offsets, PM_TOKEN_START(parser, &parser->
current), 0);
14287token_column(
const pm_parser_t *parser,
size_t newline_index,
const pm_token_t *token,
bool break_on_non_space) {
14289 const uint8_t *end = token->start;
14293 newline_index == 0 &&
14294 parser->
start[0] == 0xef &&
14295 parser->
start[1] == 0xbb &&
14296 parser->
start[2] == 0xbf
14299 int64_t column = 0;
14300 for (; cursor < end; cursor++) {
14303 column = ((column / PM_TAB_WHITESPACE_SIZE) + 1) * PM_TAB_WHITESPACE_SIZE;
14310 if (break_on_non_space)
return -1;
14323parser_warn_indentation_mismatch(
pm_parser_t *parser,
size_t opening_newline_index,
const pm_token_t *opening_token,
bool if_after_else,
bool allow_indent) {
14328 size_t closing_newline_index = token_newline_index(parser);
14329 if (opening_newline_index == closing_newline_index)
return;
14334 int64_t opening_column = token_column(parser, opening_newline_index, opening_token, !if_after_else);
14335 if (!if_after_else && (opening_column == -1))
return;
14342 int64_t closing_column = token_column(parser, closing_newline_index, closing_token,
true);
14343 if ((closing_column == -1) || (opening_column == closing_column))
return;
14347 if (allow_indent && (closing_column > opening_column))
return;
14350 PM_PARSER_WARN_FORMAT(
14352 PM_TOKEN_START(parser, closing_token),
14353 PM_TOKEN_LENGTH(closing_token),
14354 PM_WARN_INDENTATION_MISMATCH,
14355 (
int) (closing_token->
end - closing_token->
start),
14356 (
const char *) closing_token->
start,
14357 (
int) (opening_token->
end - opening_token->
start),
14358 (
const char *) opening_token->
start,
14359 ((int32_t) opening_newline_index) + parser->
start_line
14364 PM_RESCUES_BEGIN = 1,
14371} pm_rescues_type_t;
14381 while (match1(parser, PM_TOKEN_KEYWORD_RESCUE)) {
14382 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening,
false,
false);
14383 parser_lex(parser);
14387 switch (parser->
current.type) {
14388 case PM_TOKEN_EQUAL_GREATER: {
14392 parser_lex(parser);
14393 pm_rescue_node_operator_set(parser, rescue, &parser->
previous);
14395 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14396 reference = parse_target(parser, reference,
false,
false);
14398 pm_rescue_node_reference_set(rescue, reference);
14401 case PM_TOKEN_NEWLINE:
14402 case PM_TOKEN_SEMICOLON:
14403 case PM_TOKEN_KEYWORD_THEN:
14408 if (token_begins_expression_p(parser->
current.type) || match1(parser, PM_TOKEN_USTAR)) {
14413 pm_node_t *expression = parse_starred_expression(parser, PM_BINDING_POWER_DEFINED,
false, PM_ERR_RESCUE_EXPRESSION, (uint16_t) (depth + 1));
14414 pm_rescue_node_exceptions_append(parser->
arena, rescue, expression);
14418 if (match3(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_THEN))
break;
14422 if (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
14423 pm_rescue_node_operator_set(parser, rescue, &parser->
previous);
14425 pm_node_t *reference = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_RESCUE_VARIABLE, (uint16_t) (depth + 1));
14426 reference = parse_target(parser, reference,
false,
false);
14428 pm_rescue_node_reference_set(rescue, reference);
14431 }
while (accept1(parser, PM_TOKEN_COMMA));
14436 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
14437 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
14441 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_RESCUE_TERM);
14445 if (!match3(parser, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_END)) {
14446 pm_accepts_block_stack_push(parser,
true);
14461 if (statements != NULL) pm_rescue_node_statements_set(rescue, statements);
14463 pm_accepts_block_stack_pop(parser);
14464 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14467 if (current == NULL) {
14468 pm_begin_node_rescue_clause_set(parent_node, rescue);
14470 pm_rescue_node_subsequent_set(current, rescue);
14479 if (current != NULL) {
14482 while (clause != NULL) {
14483 PM_NODE_LENGTH_SET_NODE(clause, current);
14489 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
14490 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening,
false,
false);
14491 opening_newline_index = token_newline_index(parser);
14493 else_keyword = parser->
current;
14494 opening = &else_keyword;
14496 parser_lex(parser);
14497 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14500 if (!match2(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_ENSURE)) {
14501 pm_accepts_block_stack_push(parser,
true);
14515 else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14516 pm_accepts_block_stack_pop(parser);
14518 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14521 pm_else_node_t *else_clause = pm_else_node_create(parser, &else_keyword, else_statements, &parser->
current);
14522 pm_begin_node_else_clause_set(parent_node, else_clause);
14526 if (current == NULL) pm_parser_err_node(parser, UP(else_clause), PM_ERR_BEGIN_LONELY_ELSE);
14529 if (match1(parser, PM_TOKEN_KEYWORD_ENSURE)) {
14530 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening,
false,
false);
14533 parser_lex(parser);
14534 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14537 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14538 pm_accepts_block_stack_push(parser,
true);
14552 ensure_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
14553 pm_accepts_block_stack_pop(parser);
14555 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
14558 pm_ensure_node_t *ensure_clause = pm_ensure_node_create(parser, &ensure_keyword, ensure_statements, &parser->
current);
14559 pm_begin_node_ensure_clause_set(parent_node, ensure_clause);
14562 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
14563 if (opening != NULL) parser_warn_indentation_mismatch(parser, opening_newline_index, opening,
false,
false);
14564 pm_begin_node_end_keyword_set(parser, parent_node, &parser->
current);
14567 pm_begin_node_end_keyword_set(parser, parent_node, &end_keyword);
14577 pm_begin_node_t *node = pm_begin_node_create(parser, NULL, statements);
14578 parse_rescues(parser, opening_newline_index, opening, node,
type, (uint16_t) (depth + 1));
14581 PM_NODE_LENGTH_SET_TOKEN(parser, node, &parser->
current);
14590parse_block_parameters(
14592 bool allows_trailing_comma,
14594 bool is_lambda_literal,
14595 bool accepts_blocks_in_defaults,
14599 if (!match1(parser, PM_TOKEN_SEMICOLON)) {
14600 if (!is_lambda_literal) {
14603 parameters = parse_parameters(
14605 is_lambda_literal ? PM_BINDING_POWER_DEFINED : PM_BINDING_POWER_INDEX,
14607 allows_trailing_comma,
14609 accepts_blocks_in_defaults,
14611 is_lambda_literal ? PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_LAMBDA : PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES_BLOCK,
14612 (uint16_t) (depth + 1)
14614 if (!is_lambda_literal) {
14615 context_pop(parser);
14620 if (opening != NULL) {
14621 accept1(parser, PM_TOKEN_NEWLINE);
14623 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
14625 switch (parser->
current.type) {
14626 case PM_TOKEN_CONSTANT:
14627 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
14628 parser_lex(parser);
14630 case PM_TOKEN_INSTANCE_VARIABLE:
14631 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
14632 parser_lex(parser);
14634 case PM_TOKEN_GLOBAL_VARIABLE:
14635 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
14636 parser_lex(parser);
14638 case PM_TOKEN_CLASS_VARIABLE:
14639 pm_parser_err_current(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
14640 parser_lex(parser);
14643 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
14647 bool repeated = pm_parser_parameter_name_check(parser, &parser->
previous);
14648 pm_parser_local_add_token(parser, &parser->
previous, 1);
14651 if (repeated) pm_node_flag_set_repeated_parameter(UP(local));
14653 pm_block_parameters_node_append_local(parser->
arena, block_parameters, local);
14654 }
while (accept1(parser, PM_TOKEN_COMMA));
14658 return block_parameters;
14666outer_scope_using_numbered_parameters_p(
pm_parser_t *parser) {
14668 if (scope->
parameters & PM_SCOPE_PARAMETERS_NUMBERED_FOUND)
return true;
14679static const char *
const pm_numbered_parameter_names[] = {
14680 "_1",
"_2",
"_3",
"_4",
"_5",
"_6",
"_7",
"_8",
"_9"
14694 if (parameters != NULL) {
14696 if (implicit_parameters->
size > 0) {
14699 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14700 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_ORDINARY);
14701 }
else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14702 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_ORDINARY);
14704 assert(
false &&
"unreachable");
14713 if (implicit_parameters->
size == 0) {
14720 uint8_t numbered_parameter = 0;
14721 bool it_parameter =
false;
14723 for (
size_t index = 0; index < implicit_parameters->
size; index++) {
14726 if (PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE)) {
14727 if (it_parameter) {
14728 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_IT);
14729 }
else if (outer_scope_using_numbered_parameters_p(parser)) {
14730 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_OUTER_BLOCK);
14732 pm_parser_err_node(parser, node, PM_ERR_NUMBERED_PARAMETER_INNER_BLOCK);
14733 }
else if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
14734 numbered_parameter = MAX(numbered_parameter, (uint8_t) (parser->
start[node->
location.
start + 1] -
'0'));
14736 assert(
false &&
"unreachable");
14738 }
else if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
14739 if (numbered_parameter > 0) {
14740 pm_parser_err_node(parser, node, PM_ERR_IT_NOT_ALLOWED_NUMBERED);
14742 it_parameter =
true;
14747 if (numbered_parameter > 0) {
14751 scope->
parameters |= PM_SCOPE_PARAMETERS_NUMBERED_INNER;
14753 return UP(pm_numbered_parameters_node_create(parser, opening, closing, numbered_parameter));
14756 if (it_parameter) {
14757 return UP(pm_it_parameters_node_create(parser, opening, closing));
14767parse_block(
pm_parser_t *parser, uint16_t depth) {
14769 accept1(parser, PM_TOKEN_NEWLINE);
14771 pm_accepts_block_stack_push(parser,
true);
14772 pm_parser_scope_push(parser,
false);
14776 if (accept1(parser, PM_TOKEN_PIPE)) {
14778 if (match1(parser, PM_TOKEN_PIPE)) {
14779 block_parameters = pm_block_parameters_node_create(parser, NULL, &block_parameters_opening);
14781 parser_lex(parser);
14783 block_parameters = parse_block_parameters(parser,
true, &block_parameters_opening,
false,
true, (uint16_t) (depth + 1));
14784 accept1(parser, PM_TOKEN_NEWLINE);
14786 expect1(parser, PM_TOKEN_PIPE, PM_ERR_BLOCK_PARAM_PIPE_TERM);
14789 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->
previous);
14792 accept1(parser, PM_TOKEN_NEWLINE);
14795 if (opening.
type == PM_TOKEN_BRACE_LEFT) {
14796 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
14800 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BLOCK_TERM_BRACE, &opening);
14802 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
14803 if (!match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_ENSURE)) {
14804 pm_accepts_block_stack_push(parser,
true);
14806 pm_accepts_block_stack_pop(parser);
14809 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
14810 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
14811 statements = UP(parse_rescues_implicit_begin(parser, 0, NULL, opening.
start, (
pm_statements_node_t *) statements, PM_RESCUES_BLOCK, (uint16_t) (depth + 1)));
14815 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BLOCK_TERM_END, &opening);
14819 pm_locals_order(parser, &parser->
current_scope->
locals, &locals, pm_parser_scope_toplevel_p(parser));
14820 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &opening, &parser->
previous);
14822 pm_parser_scope_pop(parser);
14823 pm_accepts_block_stack_pop(parser);
14825 return pm_block_node_create(parser, &locals, &opening, parameters, statements, &parser->
previous);
14834parse_arguments_list(
pm_parser_t *parser,
pm_arguments_t *arguments,
bool accepts_block,
bool accepts_command_call, uint16_t depth) {
14835 bool found =
false;
14837 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
14841 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14844 pm_accepts_block_stack_push(parser,
true);
14845 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_PARENTHESIS_RIGHT, (uint16_t) (depth + 1));
14847 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14853 pm_accepts_block_stack_pop(parser);
14856 }
else if (accepts_command_call && (token_begins_expression_p(parser->
current.type) || match3(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR, PM_TOKEN_UAMPERSAND)) && !match1(parser, PM_TOKEN_BRACE_LEFT)) {
14858 pm_accepts_block_stack_push(parser,
false);
14863 parse_arguments(parser, arguments, accepts_block, PM_TOKEN_EOF, (uint16_t) (depth + 1));
14868 if (parser->
previous.
type == PM_TOKEN_COMMA && !match1(parser, PM_TOKEN_SEMICOLON)) {
14872 pm_accepts_block_stack_pop(parser);
14878 if (accepts_block) {
14881 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
14883 block = parse_block(parser, (uint16_t) (depth + 1));
14884 pm_arguments_validate_block(parser, arguments, block);
14885 }
else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
14887 block = parse_block(parser, (uint16_t) (depth + 1));
14890 if (block != NULL) {
14892 arguments->
block = UP(block);
14894 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_BLOCK_MULTI);
14896 if (arguments->
block != NULL) {
14898 arguments->
arguments = pm_arguments_node_create(parser);
14900 pm_arguments_node_arguments_append(parser->
arena, arguments->
arguments, arguments->
block);
14902 arguments->
block = UP(block);
14916 bool in_sclass =
false;
14918 switch (context_node->
context) {
14963 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
14986 assert(
false &&
"unreachable");
14991 pm_parser_err_node(parser, node, PM_ERR_RETURN_INVALID);
15002 switch (context_node->
context) {
15077 assert(
false &&
"unreachable");
15091 return previous_block_exits;
15105 switch (PM_NODE_TYPE(block_exit)) {
15106 case PM_BREAK_NODE:
type =
"break";
break;
15107 case PM_NEXT_NODE:
type =
"next";
break;
15108 case PM_REDO_NODE:
type =
"redo";
break;
15109 default: assert(
false &&
"unreachable");
type =
"";
break;
15112 PM_PARSER_ERR_NODE_FORMAT(parser, block_exit, PM_ERR_INVALID_BLOCK_EXIT,
type);
15124 if (match2(parser, PM_TOKEN_KEYWORD_WHILE_MODIFIER, PM_TOKEN_KEYWORD_UNTIL_MODIFIER)) {
15129 }
else if (previous_block_exits != NULL) {
15141 flush_block_exits(parser, previous_block_exits);
15149 pm_node_t *predicate = parse_value_expression(parser, binding_power,
true,
false, error_id, (uint16_t) (depth + 1));
15152 bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15154 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
15155 predicate_closed =
true;
15159 if (!predicate_closed) {
15160 pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
15163 context_pop(parser);
15168parse_conditional(
pm_parser_t *parser,
pm_context_t context,
size_t opening_newline_index,
bool if_after_else, uint16_t depth) {
15170 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
15175 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER, context, &then_keyword, (uint16_t) (depth + 1));
15178 if (!match3(parser, PM_TOKEN_KEYWORD_ELSIF, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
15179 pm_accepts_block_stack_push(parser,
true);
15180 statements = parse_statements(parser, context, (uint16_t) (depth + 1));
15181 pm_accepts_block_stack_pop(parser);
15182 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15189 parent = UP(pm_if_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15192 parent = UP(pm_unless_node_create(parser, &keyword, predicate, NTOK2PTR(then_keyword), statements));
15195 assert(
false &&
"unreachable");
15204 while (match1(parser, PM_TOKEN_KEYWORD_ELSIF)) {
15205 if (parser_end_of_line_p(parser)) {
15206 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->
current, PM_WARN_KEYWORD_EOL);
15209 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword,
false,
false);
15211 parser_lex(parser);
15213 pm_node_t *predicate = parse_predicate(parser, PM_BINDING_POWER_MODIFIER,
PM_CONTEXT_ELSIF, &then_keyword, (uint16_t) (depth + 1));
15214 pm_accepts_block_stack_push(parser,
true);
15217 pm_accepts_block_stack_pop(parser);
15218 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15220 pm_node_t *elsif = UP(pm_if_node_create(parser, &elsif_keyword, predicate, NTOK2PTR(then_keyword), statements, NULL, NULL));
15226 if (match1(parser, PM_TOKEN_KEYWORD_ELSE)) {
15227 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword,
false,
false);
15228 opening_newline_index = token_newline_index(parser);
15230 parser_lex(parser);
15233 pm_accepts_block_stack_push(parser,
true);
15235 pm_accepts_block_stack_pop(parser);
15237 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
15238 parser_warn_indentation_mismatch(parser, opening_newline_index, &else_keyword,
false,
false);
15239 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM_ELSE, &keyword);
15241 pm_else_node_t *else_node = pm_else_node_create(parser, &else_keyword, else_statements, &parser->
previous);
15245 ((
pm_if_node_t *) current)->subsequent = UP(else_node);
15251 assert(
false &&
"unreachable");
15255 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, if_after_else,
false);
15256 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CONDITIONAL_TERM, &keyword);
15263 bool recursing =
true;
15265 while (recursing) {
15266 switch (PM_NODE_TYPE(current)) {
15270 recursing = current != NULL;
15288 assert(
false &&
"unreachable");
15292 pop_block_exits(parser, previous_block_exits);
15300#define PM_CASE_KEYWORD PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15301 case PM_TOKEN_KEYWORD_ALIAS: case PM_TOKEN_KEYWORD_AND: case PM_TOKEN_KEYWORD_BEGIN: case PM_TOKEN_KEYWORD_BEGIN_UPCASE: \
15302 case PM_TOKEN_KEYWORD_BREAK: case PM_TOKEN_KEYWORD_CASE: case PM_TOKEN_KEYWORD_CLASS: case PM_TOKEN_KEYWORD_DEF: \
15303 case PM_TOKEN_KEYWORD_DEFINED: case PM_TOKEN_KEYWORD_DO: case PM_TOKEN_KEYWORD_DO_LOOP: case PM_TOKEN_KEYWORD_ELSE: \
15304 case PM_TOKEN_KEYWORD_ELSIF: case PM_TOKEN_KEYWORD_END: case PM_TOKEN_KEYWORD_END_UPCASE: case PM_TOKEN_KEYWORD_ENSURE: \
15305 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD_FOR: case PM_TOKEN_KEYWORD_IF: case PM_TOKEN_KEYWORD_IN: \
15306 case PM_TOKEN_KEYWORD_MODULE: case PM_TOKEN_KEYWORD_NEXT: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_NOT: \
15307 case PM_TOKEN_KEYWORD_OR: case PM_TOKEN_KEYWORD_REDO: case PM_TOKEN_KEYWORD_RESCUE: case PM_TOKEN_KEYWORD_RETRY: \
15308 case PM_TOKEN_KEYWORD_RETURN: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_SUPER: case PM_TOKEN_KEYWORD_THEN: \
15309 case PM_TOKEN_KEYWORD_TRUE: case PM_TOKEN_KEYWORD_UNDEF: case PM_TOKEN_KEYWORD_UNLESS: case PM_TOKEN_KEYWORD_UNTIL: \
15310 case PM_TOKEN_KEYWORD_WHEN: case PM_TOKEN_KEYWORD_WHILE: case PM_TOKEN_KEYWORD_YIELD
15316#define PM_CASE_OPERATOR PM_TOKEN_AMPERSAND: case PM_TOKEN_BACKTICK: case PM_TOKEN_BANG_EQUAL: \
15317 case PM_TOKEN_BANG_TILDE: case PM_TOKEN_BANG: case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL: \
15318 case PM_TOKEN_BRACKET_LEFT_RIGHT: case PM_TOKEN_CARET: case PM_TOKEN_EQUAL_EQUAL_EQUAL: case PM_TOKEN_EQUAL_EQUAL: \
15319 case PM_TOKEN_EQUAL_TILDE: case PM_TOKEN_GREATER_EQUAL: case PM_TOKEN_GREATER_GREATER: case PM_TOKEN_GREATER: \
15320 case PM_TOKEN_LESS_EQUAL_GREATER: case PM_TOKEN_LESS_EQUAL: case PM_TOKEN_LESS_LESS: case PM_TOKEN_LESS: \
15321 case PM_TOKEN_MINUS: case PM_TOKEN_PERCENT: case PM_TOKEN_PIPE: case PM_TOKEN_PLUS: case PM_TOKEN_SLASH: \
15322 case PM_TOKEN_STAR_STAR: case PM_TOKEN_STAR: case PM_TOKEN_TILDE: case PM_TOKEN_UAMPERSAND: case PM_TOKEN_UMINUS: \
15323 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_UPLUS: case PM_TOKEN_USTAR: case PM_TOKEN_USTAR_STAR
15330#define PM_CASE_PRIMITIVE PM_TOKEN_INTEGER: case PM_TOKEN_INTEGER_IMAGINARY: case PM_TOKEN_INTEGER_RATIONAL: \
15331 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: case PM_TOKEN_FLOAT: case PM_TOKEN_FLOAT_IMAGINARY: \
15332 case PM_TOKEN_FLOAT_RATIONAL: case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY: case PM_TOKEN_SYMBOL_BEGIN: \
15333 case PM_TOKEN_REGEXP_BEGIN: case PM_TOKEN_BACKTICK: case PM_TOKEN_PERCENT_LOWER_X: case PM_TOKEN_PERCENT_LOWER_I: \
15334 case PM_TOKEN_PERCENT_LOWER_W: case PM_TOKEN_PERCENT_UPPER_I: case PM_TOKEN_PERCENT_UPPER_W: \
15335 case PM_TOKEN_STRING_BEGIN: case PM_TOKEN_KEYWORD_NIL: case PM_TOKEN_KEYWORD_SELF: case PM_TOKEN_KEYWORD_TRUE: \
15336 case PM_TOKEN_KEYWORD_FALSE: case PM_TOKEN_KEYWORD___FILE__: case PM_TOKEN_KEYWORD___LINE__: \
15337 case PM_TOKEN_KEYWORD___ENCODING__: case PM_TOKEN_MINUS_GREATER: case PM_TOKEN_HEREDOC_START: \
15338 case PM_TOKEN_UMINUS_NUM: case PM_TOKEN_CHARACTER_LITERAL
15344#define PM_CASE_PARAMETER PM_TOKEN_UAMPERSAND: case PM_TOKEN_AMPERSAND: case PM_TOKEN_UDOT_DOT_DOT: \
15345 case PM_TOKEN_IDENTIFIER: case PM_TOKEN_LABEL: case PM_TOKEN_USTAR: case PM_TOKEN_STAR: case PM_TOKEN_STAR_STAR: \
15346 case PM_TOKEN_USTAR_STAR: case PM_TOKEN_CONSTANT: case PM_TOKEN_INSTANCE_VARIABLE: case PM_TOKEN_GLOBAL_VARIABLE: \
15347 case PM_TOKEN_CLASS_VARIABLE
15353#define PM_CASE_WRITABLE PM_CLASS_VARIABLE_READ_NODE: case PM_CONSTANT_PATH_NODE: \
15354 case PM_CONSTANT_READ_NODE: case PM_GLOBAL_VARIABLE_READ_NODE: case PM_LOCAL_VARIABLE_READ_NODE: \
15355 case PM_INSTANCE_VARIABLE_READ_NODE: case PM_MULTI_TARGET_NODE: case PM_BACK_REFERENCE_READ_NODE: \
15356 case PM_NUMBERED_REFERENCE_READ_NODE: case PM_IT_LOCAL_VARIABLE_READ_NODE
15360PM_STATIC_ASSERT(__LINE__, ((
int) PM_STRING_FLAGS_FORCED_UTF8_ENCODING) == ((
int) PM_ENCODING_FLAGS_FORCED_UTF8_ENCODING),
"Expected the flags to match.");
15366static inline pm_node_flags_t
15367parse_unescaped_encoding(
const pm_parser_t *parser) {
15372 return PM_STRING_FLAGS_FORCED_UTF8_ENCODING;
15378 return PM_STRING_FLAGS_FORCED_BINARY_ENCODING;
15389parse_string_part(
pm_parser_t *parser, uint16_t depth) {
15390 switch (parser->
current.type) {
15397 case PM_TOKEN_STRING_CONTENT: {
15398 pm_node_t *node = UP(pm_string_node_create_current_string(parser, NULL, &parser->
current, NULL));
15399 pm_node_flag_set(node, parse_unescaped_encoding(parser));
15401 parser_lex(parser);
15410 case PM_TOKEN_EMBEXPR_BEGIN: {
15419 lex_state_set(parser, PM_LEX_STATE_BEG);
15420 parser_lex(parser);
15425 if (!match3(parser, PM_TOKEN_EMBEXPR_END, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
15426 pm_accepts_block_stack_push(parser,
true);
15428 pm_accepts_block_stack_pop(parser);
15432 lex_state_set(parser, state);
15433 expect1(parser, PM_TOKEN_EMBEXPR_END, PM_ERR_EMBEXPR_END);
15438 if (statements != NULL && statements->
body.
size == 1) {
15439 pm_node_flag_unset(statements->
body.
nodes[0], PM_NODE_FLAG_NEWLINE);
15442 return UP(pm_embedded_statements_node_create(parser, &opening, statements, &parser->
previous));
15451 case PM_TOKEN_EMBVAR: {
15456 lex_state_set(parser, PM_LEX_STATE_BEG);
15457 parser_lex(parser);
15462 switch (parser->
current.type) {
15465 case PM_TOKEN_BACK_REFERENCE:
15466 parser_lex(parser);
15467 variable = UP(pm_back_reference_read_node_create(parser, &parser->
previous));
15471 case PM_TOKEN_NUMBERED_REFERENCE:
15472 parser_lex(parser);
15473 variable = UP(pm_numbered_reference_read_node_create(parser, &parser->
previous));
15477 case PM_TOKEN_GLOBAL_VARIABLE:
15478 parser_lex(parser);
15479 variable = UP(pm_global_variable_read_node_create(parser, &parser->
previous));
15483 case PM_TOKEN_INSTANCE_VARIABLE:
15484 parser_lex(parser);
15485 variable = UP(pm_instance_variable_read_node_create(parser, &parser->
previous));
15489 case PM_TOKEN_CLASS_VARIABLE:
15490 parser_lex(parser);
15491 variable = UP(pm_class_variable_read_node_create(parser, &parser->
previous));
15497 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_EMBVAR_INVALID);
15498 variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
15502 return UP(pm_embedded_variable_node_create(parser, &
operator, variable));
15505 parser_lex(parser);
15506 pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
15516static const uint8_t *
15517parse_operator_symbol_name(
const pm_token_t *name) {
15518 switch (name->
type) {
15519 case PM_TOKEN_TILDE:
15520 case PM_TOKEN_BANG:
15521 if (name->
end[-1] ==
'@')
return name->
end - 1;
15531 const uint8_t *end = parse_operator_symbol_name(&parser->
current);
15533 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15534 parser_lex(parser);
15537 pm_node_flag_set(UP(symbol), PM_SYMBOL_FLAGS_FORCED_US_ASCII_ENCODING);
15551 if (lex_mode->
mode != PM_LEX_STRING) {
15552 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15554 switch (parser->
current.type) {
15555 case PM_CASE_OPERATOR:
15556 return parse_operator_symbol(parser, &opening, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
15557 case PM_TOKEN_IDENTIFIER:
15558 case PM_TOKEN_CONSTANT:
15559 case PM_TOKEN_INSTANCE_VARIABLE:
15560 case PM_TOKEN_METHOD_NAME:
15561 case PM_TOKEN_CLASS_VARIABLE:
15562 case PM_TOKEN_GLOBAL_VARIABLE:
15563 case PM_TOKEN_NUMBERED_REFERENCE:
15564 case PM_TOKEN_BACK_REFERENCE:
15565 case PM_CASE_KEYWORD:
15566 parser_lex(parser);
15569 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
15575 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->
previous, &symbol->
unescaped,
false));
15582 if (match1(parser, PM_TOKEN_STRING_END)) {
15583 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15584 parser_lex(parser);
15586 .
type = PM_TOKEN_STRING_CONTENT,
15591 return UP(pm_symbol_node_create(parser, &opening, &content, &parser->
previous));
15595 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
15599 if (part && PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15600 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15601 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15607 if (part) pm_interpolated_symbol_node_append(parser->
arena, symbol, part);
15609 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15610 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
15611 pm_interpolated_symbol_node_append(parser->
arena, symbol, part);
15615 if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
15616 if (match1(parser, PM_TOKEN_EOF)) {
15617 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15619 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_INTERPOLATED);
15622 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->
previous);
15629 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15632 parser_lex(parser);
15643 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
15645 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
15646 pm_interpolated_symbol_node_append(parser->
arena, symbol, part);
15648 part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->
current, NULL, &parser->
current_string));
15649 pm_interpolated_symbol_node_append(parser->
arena, symbol, part);
15651 if (next_state != PM_LEX_STATE_NONE) {
15652 lex_state_set(parser, next_state);
15655 parser_lex(parser);
15656 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15658 pm_interpolated_symbol_node_closing_loc_set(parser, symbol, &parser->
previous);
15663 pm_string_shared_init(&unescaped, content.
start, content.
end);
15666 if (next_state != PM_LEX_STATE_NONE) {
15667 lex_state_set(parser, next_state);
15670 if (match1(parser, PM_TOKEN_EOF)) {
15671 pm_parser_err_token(parser, &opening, PM_ERR_SYMBOL_TERM_DYNAMIC);
15673 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
15676 return UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->
previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped,
false)));
15684parse_undef_argument(
pm_parser_t *parser, uint16_t depth) {
15685 switch (parser->
current.type) {
15686 case PM_CASE_OPERATOR:
15687 return parse_operator_symbol(parser, NULL, PM_LEX_STATE_NONE);
15688 case PM_CASE_KEYWORD:
15689 case PM_TOKEN_CONSTANT:
15690 case PM_TOKEN_IDENTIFIER:
15691 case PM_TOKEN_METHOD_NAME: {
15692 parser_lex(parser);
15696 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->
previous, &symbol->
unescaped,
false));
15700 case PM_TOKEN_SYMBOL_BEGIN: {
15702 parser_lex(parser);
15704 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15707 pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
15708 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
15719parse_alias_argument(
pm_parser_t *parser,
bool first, uint16_t depth) {
15720 switch (parser->
current.type) {
15721 case PM_CASE_OPERATOR:
15722 return parse_operator_symbol(parser, NULL, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE);
15723 case PM_CASE_KEYWORD:
15724 case PM_TOKEN_CONSTANT:
15725 case PM_TOKEN_IDENTIFIER:
15726 case PM_TOKEN_METHOD_NAME: {
15727 if (first) lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
15728 parser_lex(parser);
15732 pm_node_flag_set(UP(symbol), parse_symbol_encoding(parser, &parser->
previous, &symbol->
unescaped,
false));
15736 case PM_TOKEN_SYMBOL_BEGIN: {
15738 parser_lex(parser);
15740 return parse_symbol(parser, &lex_mode, first ? PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM : PM_LEX_STATE_NONE, (uint16_t) (depth + 1));
15742 case PM_TOKEN_BACK_REFERENCE:
15743 parser_lex(parser);
15744 return UP(pm_back_reference_read_node_create(parser, &parser->
previous));
15745 case PM_TOKEN_NUMBERED_REFERENCE:
15746 parser_lex(parser);
15747 return UP(pm_numbered_reference_read_node_create(parser, &parser->
previous));
15748 case PM_TOKEN_GLOBAL_VARIABLE:
15749 parser_lex(parser);
15750 return UP(pm_global_variable_read_node_create(parser, &parser->
previous));
15752 pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
15753 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
15765 bool is_numbered_param = pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous));
15767 if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
15768 return UP(pm_local_variable_read_node_create_constant_id(parser, &parser->
previous, name_id, (uint32_t) depth,
false));
15772 if (!current_scope->
closed && !(current_scope->
parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
15773 if (is_numbered_param) {
15778 uint8_t maximum = (uint8_t) (parser->
previous.
start[1] -
'0');
15779 for (uint8_t number = 1; number <= maximum; number++) {
15780 pm_parser_local_add_constant(parser, pm_numbered_parameter_names[number - 1], 2);
15783 if (!match1(parser, PM_TOKEN_EQUAL)) {
15787 pm_node_t *node = UP(pm_local_variable_read_node_create_constant_id(parser, &parser->
previous, name_id, 0,
false));
15792 pm_node_t *node = UP(pm_it_local_variable_read_node_create(parser, &parser->
previous));
15807 pm_node_flags_t flags = 0;
15809 if (!match1(parser, PM_TOKEN_PARENTHESIS_LEFT) && (parser->
previous.
end[-1] !=
'!') && (parser->
previous.
end[-1] !=
'?')) {
15810 pm_node_t *node = parse_variable(parser);
15811 if (node != NULL)
return node;
15812 flags |= PM_CALL_NODE_FLAGS_VARIABLE_CALL;
15816 pm_node_flag_set(UP(node), flags);
15827parse_method_definition_name(
pm_parser_t *parser) {
15828 switch (parser->
current.type) {
15829 case PM_CASE_KEYWORD:
15830 case PM_TOKEN_CONSTANT:
15831 case PM_TOKEN_METHOD_NAME:
15832 parser_lex(parser);
15834 case PM_TOKEN_IDENTIFIER:
15835 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current));
15836 parser_lex(parser);
15838 case PM_CASE_OPERATOR:
15839 lex_state_set(parser, PM_LEX_STATE_ENDFN);
15840 parser_lex(parser);
15855 if (string->
type != PM_STRING_OWNED) {
15858 pm_string_constant_init(
string, (
const char *) writable, length);
15860 writable = (uint8_t *) string->
source;
15867 const uint8_t *source_cursor = writable;
15868 const uint8_t *source_end = source_cursor + dest_length;
15873 size_t trimmed_whitespace = 0;
15879 while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
15880 if (*source_cursor ==
'\t') {
15881 trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
15882 if (trimmed_whitespace > common_whitespace)
break;
15884 trimmed_whitespace++;
15891 memmove(writable, source_cursor, (
size_t) (source_end - source_cursor));
15892 string->length = dest_length;
15902 const uint8_t *cursor = parser->
start + PM_LOCATION_START(&string_node->
content_loc);
15915 bool dedent_next =
true;
15919 size_t write_index = 0;
15926 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) {
15927 nodes->
nodes[write_index++] = node;
15928 dedent_next =
false;
15934 parse_heredoc_dedent_string(parser->
arena, &string_node->
unescaped, common_whitespace);
15937 if (heredoc_dedent_discard_string_node(parser, string_node)) {
15939 nodes->
nodes[write_index++] = node;
15943 dedent_next =
true;
15946 nodes->
size = write_index;
15953parse_strings_empty_content(
const uint8_t *location) {
15954 return (
pm_token_t) { .
type = PM_TOKEN_STRING_CONTENT, .start = location, .end = location };
15962 assert(parser->
current.type == PM_TOKEN_STRING_BEGIN);
15963 bool concating =
false;
15965 while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
15971 assert(lex_mode->
mode == PM_LEX_STRING);
15973 bool label_allowed = lex_mode->
as.string.
label_allowed && accepts_label;
15976 parser_lex(parser);
15978 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
15979 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
15988 }
else if (accept1(parser, PM_TOKEN_LABEL_END)) {
15996 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
15997 }
else if (!lex_interpolation) {
16003 if (match1(parser, PM_TOKEN_EOF)) {
16008 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
16023 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16025 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &unescaped));
16026 pm_node_list_append(parser->
arena, &parts, part);
16029 part = UP(pm_string_node_create_current_string(parser, NULL, &parser->
current, NULL));
16030 pm_node_list_append(parser->
arena, &parts, part);
16031 parser_lex(parser);
16032 }
while (match1(parser, PM_TOKEN_STRING_CONTENT));
16034 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
16035 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->
previous));
16036 }
else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16037 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->
previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped,
true)));
16038 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16039 }
else if (match1(parser, PM_TOKEN_EOF)) {
16040 pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
16041 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->
current, &unescaped));
16042 }
else if (accept1(parser, PM_TOKEN_STRING_END)) {
16043 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->
previous, &unescaped));
16048 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->
previous, &unescaped));
16050 }
else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
16057 parser_lex(parser);
16059 if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
16060 node = UP(pm_string_node_create_unescaped(parser, &opening, &content, &parser->
current, &unescaped));
16061 pm_node_flag_set(node, parse_unescaped_encoding(parser));
16067 if (!accept1(parser, PM_TOKEN_STRING_END)) {
16069 if (location > parser->
start && location[-1] ==
'\n') location--;
16070 pm_parser_err(parser, U32(location - parser->
start), 0, PM_ERR_STRING_LITERAL_EOF);
16075 }
else if (accept1(parser, PM_TOKEN_LABEL_END)) {
16076 node = UP(pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->
previous, &unescaped, parse_symbol_encoding(parser, &content, &unescaped,
true)));
16077 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16082 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->
previous, NULL, &unescaped));
16083 pm_node_flag_set(part, parse_unescaped_encoding(parser));
16084 pm_node_list_append(parser->
arena, &parts, part);
16086 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16087 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16088 pm_node_list_append(parser->
arena, &parts, part);
16092 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16093 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->
previous));
16094 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16095 }
else if (match1(parser, PM_TOKEN_EOF)) {
16096 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16097 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->
current));
16099 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16100 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->
previous));
16110 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
16111 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
16112 pm_node_list_append(parser->
arena, &parts, part);
16116 if (accept1(parser, PM_TOKEN_LABEL_END)) {
16117 node = UP(pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->
previous));
16118 if (!label_allowed) pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_LABEL);
16119 }
else if (match1(parser, PM_TOKEN_EOF)) {
16120 pm_parser_err_token(parser, &opening, PM_ERR_STRING_INTERPOLATED_TERM);
16121 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->
current));
16123 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
16124 node = UP(pm_interpolated_string_node_create(parser, &opening, &parts, &parser->
previous));
16128 if (current == NULL) {
16132 if (PM_NODE_TYPE_P(node, PM_SYMBOL_NODE) || PM_NODE_TYPE_P(node, PM_INTERPOLATED_SYMBOL_NODE)) {
16143 if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
16144 pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
16150 if (!PM_NODE_TYPE_P(current, PM_STRING_NODE) && !PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
16151 pm_parser_err_node(parser, current, PM_ERR_STRING_CONCATENATION);
16156 pm_interpolated_string_node_append(parser->
arena, container, current);
16157 current = UP(container);
16167#define PM_PARSE_PATTERN_SINGLE 0
16168#define PM_PARSE_PATTERN_TOP 1
16169#define PM_PARSE_PATTERN_MULTI 2
16182 if (peek_at(parser, parser->
start + location->
start) ==
'_')
return;
16184 if (pm_constant_id_list_includes(captures, capture)) {
16185 pm_parser_err(parser, location->
start, location->
length, PM_ERR_PATTERN_CAPTURE_DUPLICATE);
16187 pm_constant_id_list_append(parser->
arena, captures, capture);
16198 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
16200 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16201 node = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->
previous));
16207 if (!match2(parser, PM_TOKEN_BRACKET_LEFT, PM_TOKEN_PARENTHESIS_LEFT)) {
16215 if (accept1(parser, PM_TOKEN_BRACKET_LEFT)) {
16217 accept1(parser, PM_TOKEN_NEWLINE);
16219 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16220 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16221 accept1(parser, PM_TOKEN_NEWLINE);
16222 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16227 parser_lex(parser);
16229 accept1(parser, PM_TOKEN_NEWLINE);
16231 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
16232 inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16233 accept1(parser, PM_TOKEN_NEWLINE);
16234 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16243 return UP(pm_array_pattern_node_constant_create(parser, node, &opening, &closing));
16250 switch (PM_NODE_TYPE(inner)) {
16251 case PM_ARRAY_PATTERN_NODE: {
16255 PM_NODE_START_SET_NODE(pattern_node, node);
16256 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16259 pattern_node->
opening_loc = TOK2LOC(parser, &opening);
16260 pattern_node->
closing_loc = TOK2LOC(parser, &closing);
16262 return UP(pattern_node);
16267 case PM_FIND_PATTERN_NODE: {
16271 PM_NODE_START_SET_NODE(pattern_node, node);
16272 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16275 pattern_node->
opening_loc = TOK2LOC(parser, &opening);
16276 pattern_node->
closing_loc = TOK2LOC(parser, &closing);
16278 return UP(pattern_node);
16283 case PM_HASH_PATTERN_NODE: {
16287 PM_NODE_START_SET_NODE(pattern_node, node);
16288 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16291 pattern_node->
opening_loc = TOK2LOC(parser, &opening);
16292 pattern_node->
closing_loc = TOK2LOC(parser, &closing);
16294 return UP(pattern_node);
16306 pm_array_pattern_node_t *pattern_node = pm_array_pattern_node_constant_create(parser, node, &opening, &closing);
16307 pm_array_pattern_node_requireds_append(parser->
arena, pattern_node, inner);
16308 return UP(pattern_node);
16323 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16327 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16331 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->
previous));
16332 name = UP(pm_local_variable_target_node_create(
16334 &TOK2LOC(parser, &parser->
previous),
16336 (uint32_t) (depth == -1 ? 0 : depth)
16341 return pm_splat_node_create(parser, &
operator, name);
16349 assert(parser->
current.type == PM_TOKEN_USTAR_STAR);
16350 parser_lex(parser);
16355 if (accept1(parser, PM_TOKEN_KEYWORD_NIL)) {
16356 return UP(pm_no_keywords_parameter_node_create(parser, &
operator, &parser->
previous));
16359 if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
16363 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16367 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->
previous));
16368 value = UP(pm_local_variable_target_node_create(
16370 &TOK2LOC(parser, &parser->
previous),
16372 (uint32_t) (depth == -1 ? 0 : depth)
16376 return UP(pm_assoc_splat_node_create(parser, value, &
operator));
16384pm_slice_is_valid_local(
const pm_parser_t *parser,
const uint8_t *start,
const uint8_t *end) {
16385 ptrdiff_t length = end - start;
16386 if (length == 0)
return false;
16389 size_t width = char_is_identifier_start(parser, start, end - start);
16390 if (width == 0)
return false;
16396 if (pm_encoding_utf_8_isupper_char(start, length))
return false;
16401 const uint8_t *cursor = start + width;
16402 while ((width = char_is_identifier(parser, cursor, end - cursor))) cursor += width;
16403 return cursor == end;
16413 const uint8_t *start = parser->
start + PM_LOCATION_START(value_loc);
16414 const uint8_t *end = parser->
start + PM_LOCATION_END(value_loc);
16416 pm_constant_id_t constant_id = pm_parser_constant_id_raw(parser, start, end);
16419 if (pm_slice_is_valid_local(parser, start, end)) {
16420 depth = pm_parser_local_depth_constant_id(parser, constant_id);
16422 pm_parser_err(parser, PM_NODE_START(key), PM_NODE_LENGTH(key), PM_ERR_PATTERN_HASH_KEY_LOCALS);
16424 if ((end > start) && ((end[-1] ==
'!') || (end[-1] ==
'?'))) {
16425 PM_PARSER_ERR_FORMAT(parser, value_loc->
start, value_loc->
length, PM_ERR_INVALID_LOCAL_VARIABLE_WRITE, (
int) (end - start), (
const char *) start);
16430 pm_parser_local_add(parser, constant_id, start, end, 0);
16433 parse_pattern_capture(parser, captures, constant_id, value_loc);
16438 (uint32_t) (depth == -1 ? 0 : depth)
16441 return UP(pm_implicit_node_create(parser, UP(target)));
16451 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_KEY_DUPLICATE);
16464 switch (PM_NODE_TYPE(first_node)) {
16465 case PM_ASSOC_SPLAT_NODE:
16466 case PM_NO_KEYWORDS_PARAMETER_NODE:
16469 case PM_SYMBOL_NODE: {
16470 if (pm_symbol_node_label_p(parser, first_node)) {
16471 parse_pattern_hash_key(parser, &keys, first_node);
16474 if (match8(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16477 value = parse_pattern_hash_implicit_value(parser, captures, (
pm_symbol_node_t *) first_node);
16481 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16484 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16485 pm_node_list_append(parser->
arena, &assocs, assoc);
16494 pm_diagnostic_id_t diag_id = PM_NODE_TYPE_P(first_node, PM_INTERPOLATED_SYMBOL_NODE) ? PM_ERR_PATTERN_HASH_KEY_INTERPOLATED : PM_ERR_PATTERN_HASH_KEY_LABEL;
16495 pm_parser_err_node(parser, first_node, diag_id);
16497 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_START(first_node), PM_NODE_LENGTH(first_node)));
16498 pm_node_t *assoc = UP(pm_assoc_node_create(parser, first_node, NULL, value));
16500 pm_node_list_append(parser->
arena, &assocs, assoc);
16506 while (accept1(parser, PM_TOKEN_COMMA)) {
16508 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
16510 if (rest != NULL) {
16511 pm_parser_err_token(parser, &parser->
current, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16517 if (match1(parser, PM_TOKEN_USTAR_STAR)) {
16518 pm_node_t *assoc = parse_pattern_keyword_rest(parser, captures);
16520 if (rest == NULL) {
16523 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16524 pm_node_list_append(parser->
arena, &assocs, assoc);
16529 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
16530 key = parse_strings(parser, NULL,
true, (uint16_t) (depth + 1));
16532 if (PM_NODE_TYPE_P(key, PM_INTERPOLATED_SYMBOL_NODE)) {
16533 pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_INTERPOLATED);
16534 }
else if (!pm_symbol_node_label_p(parser, key)) {
16535 pm_parser_err_node(parser, key, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16537 }
else if (accept1(parser, PM_TOKEN_LABEL)) {
16538 key = UP(pm_symbol_node_label_create(parser, &parser->
previous));
16540 expect1(parser, PM_TOKEN_LABEL, PM_ERR_PATTERN_LABEL_AFTER_COMMA);
16543 key = UP(pm_symbol_node_create(parser, NULL, &label, NULL));
16546 parse_pattern_hash_key(parser, &keys, key);
16549 if (match7(parser, PM_TOKEN_COMMA, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
16550 if (PM_NODE_TYPE_P(key, PM_SYMBOL_NODE)) {
16551 value = parse_pattern_hash_implicit_value(parser, captures, (
pm_symbol_node_t *) key);
16553 value = UP(pm_missing_node_create(parser, PM_NODE_END(key), 0));
16556 value = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_KEY, (uint16_t) (depth + 1));
16559 pm_node_t *assoc = UP(pm_assoc_node_create(parser, key, NULL, value));
16561 if (rest != NULL) {
16562 pm_parser_err_node(parser, assoc, PM_ERR_PATTERN_EXPRESSION_AFTER_REST);
16565 pm_node_list_append(parser->
arena, &assocs, assoc);
16572 pm_static_literals_free(&keys);
16581 switch (parser->
current.type) {
16582 case PM_TOKEN_IDENTIFIER:
16583 case PM_TOKEN_METHOD_NAME: {
16584 parser_lex(parser);
16588 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16592 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->
previous));
16593 return UP(pm_local_variable_target_node_create(
16595 &TOK2LOC(parser, &parser->
previous),
16597 (uint32_t) (depth == -1 ? 0 : depth)
16600 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
16602 parser_lex(parser);
16604 if (accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
16607 return UP(pm_array_pattern_node_empty_create(parser, &opening, &parser->
previous));
16612 pm_node_t *inner = parse_pattern(parser, captures, PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_BRACKET, (uint16_t) (depth + 1));
16614 accept1(parser, PM_TOKEN_NEWLINE);
16615 expect1_opening(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_PATTERN_TERM_BRACKET, &opening);
16618 switch (PM_NODE_TYPE(inner)) {
16619 case PM_ARRAY_PATTERN_NODE: {
16622 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16623 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16625 pattern_node->
opening_loc = TOK2LOC(parser, &opening);
16626 pattern_node->
closing_loc = TOK2LOC(parser, &closing);
16628 return UP(pattern_node);
16633 case PM_FIND_PATTERN_NODE: {
16636 PM_NODE_START_SET_TOKEN(parser, pattern_node, &opening);
16637 PM_NODE_LENGTH_SET_TOKEN(parser, pattern_node, &closing);
16639 pattern_node->
opening_loc = TOK2LOC(parser, &opening);
16640 pattern_node->
closing_loc = TOK2LOC(parser, &closing);
16642 return UP(pattern_node);
16652 pm_array_pattern_node_requireds_append(parser->
arena, node, inner);
16655 case PM_TOKEN_BRACE_LEFT: {
16661 parser_lex(parser);
16663 if (accept1(parser, PM_TOKEN_BRACE_RIGHT)) {
16666 node = pm_hash_pattern_node_empty_create(parser, &opening, &parser->
previous);
16670 switch (parser->
current.type) {
16671 case PM_TOKEN_LABEL:
16672 parser_lex(parser);
16673 first_node = UP(pm_symbol_node_label_create(parser, &parser->
previous));
16675 case PM_TOKEN_USTAR_STAR:
16676 first_node = parse_pattern_keyword_rest(parser, captures);
16678 case PM_TOKEN_STRING_BEGIN:
16679 first_node = parse_expression(parser, PM_BINDING_POWER_MAX,
false,
true, PM_ERR_PATTERN_HASH_KEY_LABEL, (uint16_t) (depth + 1));
16683 parser_lex(parser);
16685 first_node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous)));
16690 node = parse_pattern_hash(parser, captures, first_node, (uint16_t) (depth + 1));
16692 accept1(parser, PM_TOKEN_NEWLINE);
16693 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_PATTERN_TERM_BRACE, &opening);
16696 PM_NODE_START_SET_TOKEN(parser, node, &opening);
16697 PM_NODE_LENGTH_SET_TOKEN(parser, node, &closing);
16706 case PM_TOKEN_UDOT_DOT:
16707 case PM_TOKEN_UDOT_DOT_DOT: {
16709 parser_lex(parser);
16713 switch (parser->
current.type) {
16714 case PM_CASE_PRIMITIVE: {
16715 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX,
false,
false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16716 return UP(pm_range_node_create(parser, NULL, &
operator, right));
16719 pm_parser_err_token(parser, &
operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
16720 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &
operator), PM_TOKEN_LENGTH(&
operator)));
16721 return UP(pm_range_node_create(parser, NULL, &
operator, right));
16725 case PM_CASE_PRIMITIVE: {
16726 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_MAX,
false,
true, diag_id, (uint16_t) (depth + 1));
16729 if (pm_symbol_node_label_p(parser, node))
return node;
16732 if (PM_NODE_TYPE(node) == PM_CALL_NODE) {
16733 pm_parser_err_node(parser, node, diag_id);
16734 pm_missing_node_t *missing_node = pm_missing_node_create(parser, PM_NODE_START(node), PM_NODE_LENGTH(node));
16736 pm_node_unreference(parser, node);
16737 return UP(missing_node);
16741 if (accept2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
16747 switch (parser->
current.type) {
16748 case PM_CASE_PRIMITIVE: {
16749 pm_node_t *right = parse_expression(parser, PM_BINDING_POWER_MAX,
false,
false, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE, (uint16_t) (depth + 1));
16750 return UP(pm_range_node_create(parser, node, &
operator, right));
16753 return UP(pm_range_node_create(parser, node, &
operator, NULL));
16759 case PM_TOKEN_CARET: {
16760 parser_lex(parser);
16765 switch (parser->
current.type) {
16766 case PM_TOKEN_IDENTIFIER: {
16767 parser_lex(parser);
16768 pm_node_t *variable = UP(parse_variable(parser));
16770 if (variable == NULL) {
16771 PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, &parser->
previous, PM_ERR_NO_LOCAL_VARIABLE);
16772 variable = UP(pm_local_variable_read_node_missing_create(parser, &parser->
previous, 0));
16775 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16777 case PM_TOKEN_INSTANCE_VARIABLE: {
16778 parser_lex(parser);
16779 pm_node_t *variable = UP(pm_instance_variable_read_node_create(parser, &parser->
previous));
16781 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16783 case PM_TOKEN_CLASS_VARIABLE: {
16784 parser_lex(parser);
16785 pm_node_t *variable = UP(pm_class_variable_read_node_create(parser, &parser->
previous));
16787 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16789 case PM_TOKEN_GLOBAL_VARIABLE: {
16790 parser_lex(parser);
16791 pm_node_t *variable = UP(pm_global_variable_read_node_create(parser, &parser->
previous));
16793 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16795 case PM_TOKEN_NUMBERED_REFERENCE: {
16796 parser_lex(parser);
16797 pm_node_t *variable = UP(pm_numbered_reference_read_node_create(parser, &parser->
previous));
16799 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16801 case PM_TOKEN_BACK_REFERENCE: {
16802 parser_lex(parser);
16803 pm_node_t *variable = UP(pm_back_reference_read_node_create(parser, &parser->
previous));
16805 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16807 case PM_TOKEN_PARENTHESIS_LEFT: {
16812 parser_lex(parser);
16814 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_STATEMENT,
true,
false, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN, (uint16_t) (depth + 1));
16817 accept1(parser, PM_TOKEN_NEWLINE);
16818 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &lparen);
16819 return UP(pm_pinned_expression_node_create(parser, expression, &
operator, &lparen, &parser->
previous));
16824 pm_parser_err_token(parser, &
operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
16825 pm_node_t *variable = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &
operator), PM_TOKEN_LENGTH(&
operator)));
16826 return UP(pm_pinned_variable_node_create(parser, &
operator, variable));
16830 case PM_TOKEN_UCOLON_COLON: {
16832 parser_lex(parser);
16834 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
16837 return parse_pattern_constant_path(parser, captures, UP(node), (uint16_t) (depth + 1));
16839 case PM_TOKEN_CONSTANT: {
16841 parser_lex(parser);
16843 pm_node_t *node = UP(pm_constant_read_node_create(parser, &constant));
16844 return parse_pattern_constant_path(parser, captures, node, (uint16_t) (depth + 1));
16847 pm_parser_err_current(parser, diag_id);
16848 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
16853parse_pattern_alternation_error_each(
const pm_node_t *node,
void *data) {
16854 switch (PM_NODE_TYPE(node)) {
16855 case PM_LOCAL_VARIABLE_TARGET_NODE: {
16857 pm_parser_err(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PATTERN_CAPTURE_IN_ALTERNATIVE);
16871 pm_visit_node(node, parse_pattern_alternation_error_each, parser);
16881 bool alternation =
false;
16883 while ((node == NULL) || (alternation = accept1(parser, PM_TOKEN_PIPE))) {
16884 if (alternation && !PM_NODE_TYPE_P(node, PM_ALTERNATION_PATTERN_NODE) && captures->
size) {
16885 parse_pattern_alternation_error(parser, node);
16888 switch (parser->
current.type) {
16889 case PM_TOKEN_IDENTIFIER:
16890 case PM_TOKEN_BRACKET_LEFT_ARRAY:
16891 case PM_TOKEN_BRACE_LEFT:
16892 case PM_TOKEN_CARET:
16893 case PM_TOKEN_CONSTANT:
16894 case PM_TOKEN_UCOLON_COLON:
16895 case PM_TOKEN_UDOT_DOT:
16896 case PM_TOKEN_UDOT_DOT_DOT:
16897 case PM_CASE_PRIMITIVE: {
16898 if (!alternation) {
16899 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
16902 pm_node_t *right = parse_pattern_primitive(parser, captures, PM_ERR_PATTERN_EXPRESSION_AFTER_PIPE, (uint16_t) (depth + 1));
16904 if (captures->
size) parse_pattern_alternation_error(parser, right);
16905 node = UP(pm_alternation_pattern_node_create(parser, node, right, &
operator));
16910 case PM_TOKEN_PARENTHESIS_LEFT:
16911 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
16914 parser_lex(parser);
16916 pm_node_t *body = parse_pattern(parser, captures, PM_PARSE_PATTERN_SINGLE, PM_ERR_PATTERN_EXPRESSION_AFTER_PAREN, (uint16_t) (depth + 1));
16917 accept1(parser, PM_TOKEN_NEWLINE);
16918 expect1_opening(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_PATTERN_TERM_PAREN, &opening);
16919 pm_node_t *right = UP(pm_parentheses_node_create(parser, &opening, body, &parser->
previous, 0));
16921 if (!alternation) {
16924 if (captures->
size) parse_pattern_alternation_error(parser, right);
16925 node = UP(pm_alternation_pattern_node_create(parser, node, right, &
operator));
16931 pm_parser_err_current(parser, diag_id);
16932 pm_node_t *right = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
16934 if (!alternation) {
16937 if (captures->
size) parse_pattern_alternation_error(parser, right);
16938 node = UP(pm_alternation_pattern_node_create(parser, node, right, &parser->
previous));
16948 while (accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
16950 expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_PATTERN_IDENT_AFTER_HROCKET);
16955 if ((depth = pm_parser_local_depth_constant_id(parser, constant_id)) == -1) {
16959 parse_pattern_capture(parser, captures, constant_id, &TOK2LOC(parser, &parser->
previous));
16962 &TOK2LOC(parser, &parser->
previous),
16964 (uint32_t) (depth == -1 ? 0 : depth)
16967 node = UP(pm_capture_pattern_node_create(parser, node, target, &
operator));
16980 bool leading_rest =
false;
16981 bool trailing_rest =
false;
16983 switch (parser->
current.type) {
16984 case PM_TOKEN_LABEL: {
16985 parser_lex(parser);
16987 node = UP(parse_pattern_hash(parser, captures, key, (uint16_t) (depth + 1)));
16989 if (!(flags & PM_PARSE_PATTERN_TOP)) {
16990 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
16995 case PM_TOKEN_USTAR_STAR: {
16996 node = parse_pattern_keyword_rest(parser, captures);
16997 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
16999 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17000 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17005 case PM_TOKEN_STRING_BEGIN: {
17008 node = parse_pattern_primitive(parser, captures, diag_id, (uint16_t) (depth + 1));
17010 if (pm_symbol_node_label_p(parser, node)) {
17011 node = UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17013 if (!(flags & PM_PARSE_PATTERN_TOP)) {
17014 pm_parser_err_node(parser, node, PM_ERR_PATTERN_HASH_IMPLICIT);
17020 node = parse_pattern_primitives(parser, captures, node, diag_id, (uint16_t) (depth + 1));
17023 case PM_TOKEN_USTAR: {
17024 if (flags & (PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI)) {
17025 parser_lex(parser);
17026 node = UP(parse_pattern_rest(parser, captures));
17027 leading_rest =
true;
17033 node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17039 if (pm_symbol_node_label_p(parser, node)) {
17040 return UP(parse_pattern_hash(parser, captures, node, (uint16_t) (depth + 1)));
17043 if ((flags & PM_PARSE_PATTERN_MULTI) && match1(parser, PM_TOKEN_COMMA)) {
17048 pm_node_list_append(parser->
arena, &nodes, node);
17051 while (accept1(parser, PM_TOKEN_COMMA)) {
17053 if (match7(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17054 node = UP(pm_implicit_rest_node_create(parser, &parser->
previous));
17055 pm_node_list_append(parser->
arena, &nodes, node);
17056 trailing_rest =
true;
17060 if (accept1(parser, PM_TOKEN_USTAR)) {
17061 node = UP(parse_pattern_rest(parser, captures));
17066 if (trailing_rest) {
17067 pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
17070 trailing_rest =
true;
17072 node = parse_pattern_primitives(parser, captures, NULL, PM_ERR_PATTERN_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
17075 pm_node_list_append(parser->
arena, &nodes, node);
17082 if (leading_rest && PM_NODE_TYPE_P(nodes.
nodes[nodes.
size - 1], PM_SPLAT_NODE)) {
17083 node = UP(pm_find_pattern_node_create(parser, &nodes));
17085 if (nodes.
size == 2) {
17086 pm_parser_err_node(parser, node, PM_ERR_PATTERN_FIND_MISSING_INNER);
17089 node = UP(pm_array_pattern_node_node_list_create(parser, &nodes));
17091 if (leading_rest && trailing_rest) {
17092 pm_parser_err_node(parser, node, PM_ERR_PATTERN_ARRAY_MULTIPLE_RESTS);
17097 }
else if (leading_rest) {
17100 node = UP(pm_array_pattern_node_rest_create(parser, node));
17112parse_negative_numeric(
pm_node_t *node) {
17113 switch (PM_NODE_TYPE(node)) {
17114 case PM_INTEGER_NODE: {
17121 case PM_FLOAT_NODE: {
17128 case PM_RATIONAL_NODE: {
17135 case PM_IMAGINARY_NODE:
17141 assert(
false &&
"unreachable");
17154 case PM_ERR_HASH_KEY: {
17158 case PM_ERR_HASH_VALUE:
17159 case PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR: {
17163 case PM_ERR_UNARY_RECEIVER: {
17168 case PM_ERR_UNARY_DISALLOWED:
17169 case PM_ERR_EXPECT_ARGUMENT: {
17174 pm_parser_err_previous(parser, diag_id);
17184#define CONTEXT_NONE 0
17185#define CONTEXT_THROUGH_ENSURE 1
17186#define CONTEXT_THROUGH_ELSE 2
17189 int context = CONTEXT_NONE;
17191 while (context_node != NULL) {
17192 switch (context_node->
context) {
17213 if (context == CONTEXT_NONE) {
17214 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_WITHOUT_RESCUE);
17215 }
else if (context == CONTEXT_THROUGH_ENSURE) {
17216 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ENSURE);
17217 }
else if (context == CONTEXT_THROUGH_ELSE) {
17218 pm_parser_err_node(parser, node, PM_ERR_INVALID_RETRY_AFTER_ELSE);
17230 context = CONTEXT_THROUGH_ELSE;
17241 context = CONTEXT_THROUGH_ENSURE;
17245 assert(
false &&
"unreachable");
17276 context_node = context_node->
prev;
17280#undef CONTEXT_ENSURE
17291 while (context_node != NULL) {
17292 switch (context_node->
context) {
17317 pm_parser_err_node(parser, node, PM_ERR_INVALID_YIELD);
17321 assert(
false &&
"unreachable");
17363 context_node = context_node->
prev;
17395parse_regular_expression_error(
const uint8_t *start,
const uint8_t *end,
const char *message,
void *data) {
17399 if (callback_data->
shared) {
17400 location = (
pm_token_t) { .
type = 0, .start = start, .end = end };
17405 PM_PARSER_ERR_FORMAT(callback_data->
parser, PM_TOKEN_START(callback_data->
parser, &location), PM_TOKEN_LENGTH(&location), PM_ERR_REGEXP_PARSE_ERROR, message);
17416 .
start = parser->
start + PM_NODE_START(node),
17417 .end = parser->
start + PM_NODE_END(node),
17418 .shared = unescaped->
type == PM_STRING_SHARED
17421 pm_regexp_parse(parser,
pm_string_source(unescaped),
pm_string_length(unescaped), PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED), NULL, NULL, parse_regular_expression_error, &error_data);
17428parse_expression_prefix(
pm_parser_t *parser, pm_binding_power_t binding_power,
bool accepts_command_call,
bool accepts_label,
pm_diagnostic_id_t diag_id, uint16_t depth) {
17429 switch (parser->
current.type) {
17430 case PM_TOKEN_BRACKET_LEFT_ARRAY: {
17431 parser_lex(parser);
17434 pm_accepts_block_stack_push(parser,
true);
17435 bool parsed_bare_hash =
false;
17437 while (!match2(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_EOF)) {
17438 bool accepted_newline = accept1(parser, PM_TOKEN_NEWLINE);
17442 if (accepted_newline && match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17448 if (accept1(parser, PM_TOKEN_COMMA)) {
17451 if (accepted_newline) {
17452 pm_parser_err_previous(parser, PM_ERR_INVALID_COMMA);
17466 if (match1(parser, PM_TOKEN_BRACKET_RIGHT))
break;
17470 if (accept1(parser, PM_TOKEN_USTAR)) {
17474 if (match3(parser, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_EOF)) {
17475 pm_parser_scope_forwarding_positionals_check(parser, &
operator);
17477 expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_ARRAY_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
17480 element = UP(pm_splat_node_create(parser, &
operator, expression));
17481 }
else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
17482 if (parsed_bare_hash) {
17483 pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
17486 element = UP(pm_keyword_hash_node_create(parser));
17489 if (!match8(parser, PM_TOKEN_EOF, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_KEYWORD_DO, PM_TOKEN_PARENTHESIS_RIGHT)) {
17490 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17493 pm_static_literals_free(&hash_keys);
17494 parsed_bare_hash =
true;
17496 element = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
true, PM_ERR_ARRAY_EXPRESSION, (uint16_t) (depth + 1));
17498 if (pm_symbol_node_label_p(parser, element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
17499 if (parsed_bare_hash) {
17500 pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
17505 pm_hash_key_static_literals_add(parser, &hash_keys, element);
17508 if (parser->
previous.
type == PM_TOKEN_EQUAL_GREATER) {
17512 pm_node_t *value = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_HASH_VALUE, (uint16_t) (depth + 1));
17513 pm_node_t *assoc = UP(pm_assoc_node_create(parser, element, NTOK2PTR(
operator), value));
17514 pm_keyword_hash_node_elements_append(parser->
arena, hash, assoc);
17516 element = UP(hash);
17517 if (accept1(parser, PM_TOKEN_COMMA) && !match1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17518 parse_assocs(parser, &hash_keys, element, (uint16_t) (depth + 1));
17521 pm_static_literals_free(&hash_keys);
17522 parsed_bare_hash =
true;
17526 pm_array_node_elements_append(parser->
arena, array, element);
17527 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE))
break;
17530 accept1(parser, PM_TOKEN_NEWLINE);
17532 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
17538 pm_array_node_close_set(parser, array, &parser->
previous);
17539 pm_accepts_block_stack_pop(parser);
17543 case PM_TOKEN_PARENTHESIS_LEFT:
17544 case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES: {
17546 pm_node_flags_t flags = 0;
17549 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
17551 parser_lex(parser);
17553 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17554 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17555 }
else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17562 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_EOF)) {
17563 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17564 pop_block_exits(parser, previous_block_exits);
17565 return UP(pm_parentheses_node_create(parser, &opening, NULL, &parser->
previous, flags));
17570 pm_accepts_block_stack_push(parser,
true);
17572 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_STATEMENT,
true,
false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17573 context_pop(parser);
17578 bool terminator_found =
false;
17580 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17581 terminator_found =
true;
17582 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17583 }
else if (accept1(parser, PM_TOKEN_NEWLINE)) {
17584 terminator_found =
true;
17587 if (terminator_found) {
17589 if (accept1(parser, PM_TOKEN_SEMICOLON)) {
17590 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17591 }
else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
17600 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17601 if (opening.
type == PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES) {
17602 lex_state_set(parser, PM_LEX_STATE_ENDARG);
17605 parser_lex(parser);
17606 pm_accepts_block_stack_pop(parser);
17607 pop_block_exits(parser, previous_block_exits);
17609 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) || PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17615 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE) && ((
pm_multi_target_node_t *) statement)->lparen_loc.length == 0) {
17618 multi_target = pm_multi_target_node_create(parser);
17619 pm_multi_target_node_targets_append(parser, multi_target, statement);
17622 multi_target->
lparen_loc = TOK2LOC(parser, &opening);
17624 PM_NODE_START_SET_TOKEN(parser, multi_target, &opening);
17625 PM_NODE_LENGTH_SET_TOKEN(parser, multi_target, &parser->
previous);
17628 if (match1(parser, PM_TOKEN_COMMA) && (binding_power == PM_BINDING_POWER_STATEMENT)) {
17629 result = parse_targets(parser, UP(multi_target), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17630 accept1(parser, PM_TOKEN_NEWLINE);
17632 result = UP(multi_target);
17641 }
else if (binding_power != PM_BINDING_POWER_STATEMENT) {
17644 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17645 }
else if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
17649 pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
17659 pm_statements_node_body_append(parser, statements, statement,
true);
17661 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->
previous, flags));
17668 flags |= PM_PARENTHESES_NODE_FLAGS_MULTIPLE_STATEMENTS;
17671 pm_statements_node_body_append(parser, statements, statement,
true);
17675 if (!terminator_found && !match1(parser, PM_TOKEN_EOF)) {
17681 pm_node_t *node = parse_expression(parser, PM_BINDING_POWER_STATEMENT,
true,
false, PM_ERR_CANNOT_PARSE_EXPRESSION, (uint16_t) (depth + 1));
17682 pm_statements_node_body_append(parser, statements, node,
true);
17689 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) parser->
recovering =
false;
17695 if (PM_NODE_TYPE_P(node, PM_MISSING_NODE))
break;
17699 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
17700 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
17701 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT))
break;
17702 }
else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
17704 }
else if (!match1(parser, PM_TOKEN_EOF)) {
17711 context_pop(parser);
17712 pm_accepts_block_stack_pop(parser);
17713 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
17722 if (PM_NODE_TYPE_P(statement, PM_SPLAT_NODE)) {
17724 pm_multi_target_node_targets_append(parser, multi_target, statement);
17726 statement = UP(multi_target);
17730 if (PM_NODE_TYPE_P(statement, PM_MULTI_TARGET_NODE)) {
17731 const uint8_t *offset = parser->
start + PM_NODE_END(statement);
17732 pm_token_t operator = { .
type = PM_TOKEN_EQUAL, .start = offset, .end = offset };
17733 pm_node_t *value = UP(pm_missing_node_create(parser, PM_NODE_END(statement), 0));
17735 statement = UP(pm_multi_write_node_create(parser, (
pm_multi_target_node_t *) statement, &
operator, value));
17738 pm_parser_err_node(parser, statement, PM_ERR_WRITE_TARGET_UNEXPECTED);
17742 pop_block_exits(parser, previous_block_exits);
17743 pm_void_statements_check(parser, statements,
true);
17744 return UP(pm_parentheses_node_create(parser, &opening, UP(statements), &parser->
previous, flags));
17746 case PM_TOKEN_BRACE_LEFT: {
17757 pm_accepts_block_stack_push(parser,
true);
17758 parser_lex(parser);
17763 if (!match2(parser, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_EOF)) {
17764 if (current_hash_keys != NULL) {
17765 parse_assocs(parser, current_hash_keys, UP(node), (uint16_t) (depth + 1));
17768 parse_assocs(parser, &hash_keys, UP(node), (uint16_t) (depth + 1));
17769 pm_static_literals_free(&hash_keys);
17772 accept1(parser, PM_TOKEN_NEWLINE);
17775 pm_accepts_block_stack_pop(parser);
17776 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_HASH_TERM, &opening);
17777 pm_hash_node_closing_loc_set(parser, node, &parser->
previous);
17781 case PM_TOKEN_CHARACTER_LITERAL: {
17782 pm_node_t *node = UP(pm_string_node_create_current_string(
17785 .type = PM_TOKEN_STRING_BEGIN,
17786 .start = parser->
current.start,
17787 .end = parser->
current.start + 1
17790 .type = PM_TOKEN_STRING_CONTENT,
17791 .start = parser->
current.start + 1,
17797 pm_node_flag_set(node, parse_unescaped_encoding(parser));
17801 parser_lex(parser);
17805 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
17806 return parse_strings(parser, node,
false, (uint16_t) (depth + 1));
17811 case PM_TOKEN_CLASS_VARIABLE: {
17812 parser_lex(parser);
17813 pm_node_t *node = UP(pm_class_variable_read_node_create(parser, &parser->
previous));
17815 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17816 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17821 case PM_TOKEN_CONSTANT: {
17822 parser_lex(parser);
17828 match1(parser, PM_TOKEN_PARENTHESIS_LEFT) ||
17829 (accepts_command_call && (token_begins_expression_p(parser->
current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17830 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17831 match1(parser, PM_TOKEN_BRACE_LEFT)
17834 parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1));
17835 return UP(pm_call_node_fcall_create(parser, &constant, &arguments));
17840 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17843 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17848 case PM_TOKEN_UCOLON_COLON: {
17849 parser_lex(parser);
17852 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
17853 pm_node_t *node = UP(pm_constant_path_node_create(parser, NULL, &delimiter, &parser->
previous));
17855 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17856 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17861 case PM_TOKEN_UDOT_DOT:
17862 case PM_TOKEN_UDOT_DOT_DOT: {
17864 parser_lex(parser);
17866 pm_node_t *right = parse_expression(parser, pm_binding_powers[
operator.
type].left,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
17872 if (match2(parser, PM_TOKEN_DOT_DOT, PM_TOKEN_DOT_DOT_DOT)) {
17873 pm_parser_err_current(parser, PM_ERR_UNEXPECTED_RANGE_OPERATOR);
17876 return UP(pm_range_node_create(parser, NULL, &
operator, right));
17878 case PM_TOKEN_FLOAT:
17879 parser_lex(parser);
17880 return UP(pm_float_node_create(parser, &parser->
previous));
17881 case PM_TOKEN_FLOAT_IMAGINARY:
17882 parser_lex(parser);
17883 return UP(pm_float_node_imaginary_create(parser, &parser->
previous));
17884 case PM_TOKEN_FLOAT_RATIONAL:
17885 parser_lex(parser);
17886 return UP(pm_float_node_rational_create(parser, &parser->
previous));
17887 case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
17888 parser_lex(parser);
17889 return UP(pm_float_node_rational_imaginary_create(parser, &parser->
previous));
17890 case PM_TOKEN_NUMBERED_REFERENCE: {
17891 parser_lex(parser);
17892 pm_node_t *node = UP(pm_numbered_reference_read_node_create(parser, &parser->
previous));
17894 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17895 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17900 case PM_TOKEN_GLOBAL_VARIABLE: {
17901 parser_lex(parser);
17902 pm_node_t *node = UP(pm_global_variable_read_node_create(parser, &parser->
previous));
17904 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17905 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17910 case PM_TOKEN_BACK_REFERENCE: {
17911 parser_lex(parser);
17912 pm_node_t *node = UP(pm_back_reference_read_node_create(parser, &parser->
previous));
17914 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
17915 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17920 case PM_TOKEN_IDENTIFIER:
17921 case PM_TOKEN_METHOD_NAME: {
17922 parser_lex(parser);
17924 pm_node_t *node = parse_variable_call(parser);
17926 if (PM_NODE_TYPE_P(node, PM_CALL_NODE)) {
17934 if (parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1))) {
17937 pm_node_flag_unset(UP(call), PM_CALL_NODE_FLAGS_VARIABLE_CALL);
17946 PM_NODE_LENGTH_SET_LOCATION(call, &call->
message_loc);
17948 PM_NODE_LENGTH_SET_LOCATION(call, end);
17956 (accepts_command_call && (token_begins_expression_p(parser->
current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR))) ||
17957 (pm_accepts_block_stack_p(parser) && match1(parser, PM_TOKEN_KEYWORD_DO)) ||
17958 match1(parser, PM_TOKEN_BRACE_LEFT)
17961 parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1));
17962 pm_call_node_t *fcall = pm_call_node_fcall_create(parser, &identifier, &arguments);
17964 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
17968 pm_node_unreference(parser, node);
17974 assert(PM_NODE_TYPE_P(node, PM_LOCAL_VARIABLE_READ_NODE));
17976 if (pm_token_is_numbered_parameter(parser, PM_TOKEN_START(parser, &identifier), PM_TOKEN_LENGTH(&identifier))) {
17977 pm_node_unreference(parser, node);
17980 pm_locals_unread(&pm_parser_scope_find(parser, cast->
depth)->
locals, cast->
name);
17988 if ((binding_power == PM_BINDING_POWER_STATEMENT) && match1(parser, PM_TOKEN_COMMA)) {
17989 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
17994 case PM_TOKEN_HEREDOC_START: {
18000 size_t common_whitespace = (size_t) -1;
18003 parser_lex(parser);
18009 if (match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18015 if (lex_mode.
quote == PM_HEREDOC_QUOTE_BACKTICK) {
18016 node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->
previous, &
PM_STRING_EMPTY));
18021 PM_NODE_LENGTH_SET_TOKEN(parser, node, &opening);
18022 }
else if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) == NULL) {
18029 node = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous)));
18030 }
else if (PM_NODE_TYPE_P(part, PM_STRING_NODE) && match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18035 pm_node_flag_set(part, parse_unescaped_encoding(parser));
18042 if (lex_mode.
quote == PM_HEREDOC_QUOTE_BACKTICK) {
18044 cast->
base.
type = PM_X_STRING_NODE;
18047 if (lex_mode.
indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (
size_t) -1) && (common_whitespace != 0)) {
18048 parse_heredoc_dedent_string(parser->
arena, &cast->
unescaped, common_whitespace);
18058 pm_node_list_append(parser->
arena, &parts, part);
18060 while (!match2(parser, PM_TOKEN_HEREDOC_END, PM_TOKEN_EOF)) {
18061 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
18062 pm_node_list_append(parser->
arena, &parts, part);
18068 if (lex_mode.
quote == PM_HEREDOC_QUOTE_BACKTICK) {
18070 cast->
parts = parts;
18073 pm_interpolated_xstring_node_closing_set(parser, cast, &parser->
previous);
18081 pm_interpolated_string_node_closing_set(parser, cast, &parser->
previous);
18089 if (lex_mode.
indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (
size_t) -1) && (common_whitespace != 0)) {
18091 if (lex_mode.
quote == PM_HEREDOC_QUOTE_BACKTICK) {
18097 parse_heredoc_dedent(parser, nodes, common_whitespace);
18101 if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
18102 return parse_strings(parser, node,
false, (uint16_t) (depth + 1));
18107 case PM_TOKEN_INSTANCE_VARIABLE: {
18108 parser_lex(parser);
18109 pm_node_t *node = UP(pm_instance_variable_read_node_create(parser, &parser->
previous));
18111 if (binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
18112 node = parse_targets_validate(parser, node, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
18117 case PM_TOKEN_INTEGER: {
18119 parser_lex(parser);
18120 return UP(pm_integer_node_create(parser, base, &parser->
previous));
18122 case PM_TOKEN_INTEGER_IMAGINARY: {
18124 parser_lex(parser);
18125 return UP(pm_integer_node_imaginary_create(parser, base, &parser->
previous));
18127 case PM_TOKEN_INTEGER_RATIONAL: {
18129 parser_lex(parser);
18130 return UP(pm_integer_node_rational_create(parser, base, &parser->
previous));
18132 case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY: {
18134 parser_lex(parser);
18135 return UP(pm_integer_node_rational_imaginary_create(parser, base, &parser->
previous));
18137 case PM_TOKEN_KEYWORD___ENCODING__:
18138 parser_lex(parser);
18139 return UP(pm_source_encoding_node_create(parser, &parser->
previous));
18140 case PM_TOKEN_KEYWORD___FILE__:
18141 parser_lex(parser);
18142 return UP(pm_source_file_node_create(parser, &parser->
previous));
18143 case PM_TOKEN_KEYWORD___LINE__:
18144 parser_lex(parser);
18145 return UP(pm_source_line_node_create(parser, &parser->
previous));
18146 case PM_TOKEN_KEYWORD_ALIAS: {
18147 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18148 pm_parser_err_current(parser, PM_ERR_STATEMENT_ALIAS);
18151 parser_lex(parser);
18154 pm_node_t *new_name = parse_alias_argument(parser,
true, (uint16_t) (depth + 1));
18155 pm_node_t *old_name = parse_alias_argument(parser,
false, (uint16_t) (depth + 1));
18157 switch (PM_NODE_TYPE(new_name)) {
18158 case PM_BACK_REFERENCE_READ_NODE:
18159 case PM_NUMBERED_REFERENCE_READ_NODE:
18160 case PM_GLOBAL_VARIABLE_READ_NODE: {
18161 if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
18162 if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
18163 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT_NUMBERED_REFERENCE);
18166 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18169 return UP(pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name));
18171 case PM_SYMBOL_NODE:
18172 case PM_INTERPOLATED_SYMBOL_NODE: {
18173 if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
18174 pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18179 return UP(pm_alias_method_node_create(parser, &keyword, new_name, old_name));
18182 case PM_TOKEN_KEYWORD_CASE: {
18183 size_t opening_newline_index = token_newline_index(parser);
18184 parser_lex(parser);
18190 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
18192 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18193 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18195 }
else if (match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_END)) {
18197 }
else if (!token_begins_expression_p(parser->
current.type)) {
18200 predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_CASE_EXPRESSION_AFTER_CASE, (uint16_t) (depth + 1));
18201 while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
18204 if (match1(parser, PM_TOKEN_KEYWORD_END)) {
18205 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword,
false,
false);
18206 parser_lex(parser);
18207 pop_block_exits(parser, previous_block_exits);
18208 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18209 return UP(pm_case_node_create(parser, &case_keyword, predicate, &parser->
previous));
18216 if (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18217 pm_case_node_t *case_node = pm_case_node_create(parser, &case_keyword, predicate, NULL);
18223 while (match1(parser, PM_TOKEN_KEYWORD_WHEN)) {
18224 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword,
false,
true);
18225 parser_lex(parser);
18228 pm_when_node_t *when_node = pm_when_node_create(parser, &when_keyword);
18231 if (accept1(parser, PM_TOKEN_USTAR)) {
18233 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
18235 pm_splat_node_t *splat_node = pm_splat_node_create(parser, &
operator, expression);
18236 pm_when_node_conditions_append(parser->
arena, when_node, UP(splat_node));
18238 if (PM_NODE_TYPE_P(expression, PM_MISSING_NODE))
break;
18240 pm_node_t *condition = parse_value_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_CASE_EXPRESSION_AFTER_WHEN, (uint16_t) (depth + 1));
18241 pm_when_node_conditions_append(parser->
arena, when_node, condition);
18245 if (PM_NODE_TYPE_P(condition, PM_MISSING_NODE))
break;
18249 if (PM_NODE_TYPE_P(condition, PM_STRING_NODE)) {
18250 pm_node_flag_set(condition, PM_STRING_FLAGS_FROZEN | PM_NODE_FLAG_STATIC_LITERAL);
18251 }
else if (PM_NODE_TYPE_P(condition, PM_SOURCE_FILE_NODE)) {
18252 pm_node_flag_set(condition, PM_NODE_FLAG_STATIC_LITERAL);
18255 pm_when_clause_static_literals_add(parser, &literals, condition);
18257 }
while (accept1(parser, PM_TOKEN_COMMA));
18259 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18260 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18261 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->
previous);
18264 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_WHEN_DELIMITER);
18265 pm_when_node_then_keyword_loc_set(parser, when_node, &parser->
previous);
18268 if (!match3(parser, PM_TOKEN_KEYWORD_WHEN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18270 if (statements != NULL) {
18271 pm_when_node_statements_set(when_node, statements);
18275 pm_case_node_condition_append(parser->
arena, case_node, UP(when_node));
18281 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18284 pm_static_literals_free(&literals);
18285 node = UP(case_node);
18287 pm_case_match_node_t *case_node = pm_case_match_node_create(parser, &case_keyword, predicate);
18291 if (predicate == NULL) {
18292 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MATCH_MISSING_PREDICATE);
18298 while (match1(parser, PM_TOKEN_KEYWORD_IN)) {
18299 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword,
false,
true);
18304 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
18306 parser_lex(parser);
18311 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
18318 if (accept1(parser, PM_TOKEN_KEYWORD_IF_MODIFIER)) {
18320 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
18321 pattern = UP(pm_if_node_modifier_create(parser, pattern, &keyword, predicate));
18322 }
else if (accept1(parser, PM_TOKEN_KEYWORD_UNLESS_MODIFIER)) {
18324 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
18325 pattern = UP(pm_unless_node_modifier_create(parser, pattern, &keyword, predicate));
18332 if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18333 if (accept1(parser, PM_TOKEN_KEYWORD_THEN)) {
18337 expect1(parser, PM_TOKEN_KEYWORD_THEN, PM_ERR_EXPECT_IN_DELIMITER);
18344 if (match3(parser, PM_TOKEN_KEYWORD_IN, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18352 pm_node_t *condition = UP(pm_in_node_create(parser, pattern, statements, &in_keyword, NTOK2PTR(then_keyword)));
18353 pm_case_match_node_condition_append(parser->
arena, case_node, condition);
18359 pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
18362 node = UP(case_node);
18365 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18366 if (accept1(parser, PM_TOKEN_KEYWORD_ELSE)) {
18370 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
18371 else_node = pm_else_node_create(parser, &else_keyword, parse_statements(parser,
PM_CONTEXT_ELSE, (uint16_t) (depth + 1)), &parser->
current);
18373 else_node = pm_else_node_create(parser, &else_keyword, NULL, &parser->
current);
18376 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18377 pm_case_node_else_clause_set((
pm_case_node_t *) node, else_node);
18383 parser_warn_indentation_mismatch(parser, opening_newline_index, &case_keyword,
false,
false);
18384 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CASE_TERM, &case_keyword);
18386 if (PM_NODE_TYPE_P(node, PM_CASE_NODE)) {
18392 pop_block_exits(parser, previous_block_exits);
18395 case PM_TOKEN_KEYWORD_BEGIN: {
18396 size_t opening_newline_index = token_newline_index(parser);
18397 parser_lex(parser);
18400 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18403 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
18406 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18407 pm_accepts_block_stack_push(parser,
true);
18408 begin_statements = parse_statements(parser,
PM_CONTEXT_BEGIN, (uint16_t) (depth + 1));
18409 pm_accepts_block_stack_pop(parser);
18410 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18413 pm_begin_node_t *begin_node = pm_begin_node_create(parser, &begin_keyword, begin_statements);
18414 parse_rescues(parser, opening_newline_index, &begin_keyword, begin_node, PM_RESCUES_BEGIN, (uint16_t) (depth + 1));
18415 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_BEGIN_TERM, &begin_keyword);
18417 PM_NODE_LENGTH_SET_TOKEN(parser, begin_node, &parser->
previous);
18418 pm_begin_node_end_keyword_set(parser, begin_node, &parser->
previous);
18419 pop_block_exits(parser, previous_block_exits);
18420 return UP(begin_node);
18422 case PM_TOKEN_KEYWORD_BEGIN_UPCASE: {
18424 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
18426 if (binding_power != PM_BINDING_POWER_STATEMENT) {
18427 pm_parser_err_current(parser, PM_ERR_STATEMENT_PREEXE_BEGIN);
18430 parser_lex(parser);
18433 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_BEGIN_UPCASE_BRACE);
18437 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM, &opening);
18440 pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
18443 flush_block_exits(parser, previous_block_exits);
18444 return UP(pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->
previous));
18446 case PM_TOKEN_KEYWORD_BREAK:
18447 case PM_TOKEN_KEYWORD_NEXT:
18448 case PM_TOKEN_KEYWORD_RETURN: {
18449 parser_lex(parser);
18455 token_begins_expression_p(parser->
current.type) ||
18456 match2(parser, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)
18458 pm_binding_power_t binding_power = pm_binding_powers[parser->
current.type].
left;
18460 if (binding_power == PM_BINDING_POWER_UNSET || binding_power >= PM_BINDING_POWER_RANGE) {
18462 parse_arguments(parser, &arguments,
false, PM_TOKEN_EOF, (uint16_t) (depth + 1));
18465 if (!accepts_command_call && arguments.
arguments != NULL) {
18466 PM_PARSER_ERR_TOKEN_FORMAT(parser, &next, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(next.
type));
18471 switch (keyword.
type) {
18472 case PM_TOKEN_KEYWORD_BREAK: {
18477 case PM_TOKEN_KEYWORD_NEXT: {
18482 case PM_TOKEN_KEYWORD_RETURN: {
18484 parse_return(parser, node);
18488 assert(
false &&
"unreachable");
18489 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous)));
18492 case PM_TOKEN_KEYWORD_SUPER: {
18493 parser_lex(parser);
18497 parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1));
18502 ((arguments.
block == NULL) || PM_NODE_TYPE_P(arguments.
block, PM_BLOCK_NODE))
18504 return UP(pm_forwarding_super_node_create(parser, &keyword, &arguments));
18507 return UP(pm_super_node_create(parser, &keyword, &arguments));
18509 case PM_TOKEN_KEYWORD_YIELD: {
18510 parser_lex(parser);
18514 parse_arguments_list(parser, &arguments,
false, accepts_command_call, (uint16_t) (depth + 1));
18520 if (arguments.
block != NULL) {
18521 pm_parser_err_node(parser, arguments.
block, PM_ERR_UNEXPECTED_BLOCK_ARGUMENT);
18522 pm_node_unreference(parser, arguments.
block);
18523 arguments.
block = NULL;
18531 case PM_TOKEN_KEYWORD_CLASS: {
18532 size_t opening_newline_index = token_newline_index(parser);
18533 parser_lex(parser);
18536 pm_do_loop_stack_push(parser,
false);
18539 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
18541 if (accept1(parser, PM_TOKEN_LESS_LESS)) {
18543 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_LESS_LESS, (uint16_t) (depth + 1));
18545 pm_parser_scope_push(parser,
true);
18546 if (!match2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
18551 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18552 pm_accepts_block_stack_push(parser,
true);
18553 statements = UP(parse_statements(parser,
PM_CONTEXT_SCLASS, (uint16_t) (depth + 1)));
18554 pm_accepts_block_stack_pop(parser);
18557 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18558 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18559 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.
start, (
pm_statements_node_t *) statements, PM_RESCUES_SCLASS, (uint16_t) (depth + 1)));
18561 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword,
false,
false);
18564 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18569 pm_parser_scope_pop(parser);
18570 pm_do_loop_stack_pop(parser);
18572 flush_block_exits(parser, previous_block_exits);
18573 return UP(pm_singleton_class_node_create(parser, &locals, &class_keyword, &
operator, expression, statements, &parser->
previous));
18576 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_CLASS_NAME, (uint16_t) (depth + 1));
18578 if (name.
type != PM_TOKEN_CONSTANT) {
18579 pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
18585 if (match1(parser, PM_TOKEN_LESS)) {
18586 inheritance_operator = parser->
current;
18587 lex_state_set(parser, PM_LEX_STATE_BEG);
18590 parser_lex(parser);
18592 superclass = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_CLASS_SUPERCLASS, (uint16_t) (depth + 1));
18597 pm_parser_scope_push(parser,
true);
18599 if (inheritance_operator.
start != NULL) {
18600 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CLASS_UNEXPECTED_END);
18602 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18606 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18607 pm_accepts_block_stack_push(parser,
true);
18608 statements = UP(parse_statements(parser,
PM_CONTEXT_CLASS, (uint16_t) (depth + 1)));
18609 pm_accepts_block_stack_pop(parser);
18612 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
18613 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18614 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &class_keyword, class_keyword.
start, (
pm_statements_node_t *) statements, PM_RESCUES_CLASS, (uint16_t) (depth + 1)));
18616 parser_warn_indentation_mismatch(parser, opening_newline_index, &class_keyword,
false,
false);
18619 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM, &class_keyword);
18621 if (context_def_p(parser)) {
18622 pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
18628 pm_parser_scope_pop(parser);
18629 pm_do_loop_stack_pop(parser);
18631 if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
18632 pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
18635 pop_block_exits(parser, previous_block_exits);
18636 return UP(pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, NTOK2PTR(inheritance_operator), superclass, statements, &parser->
previous));
18638 case PM_TOKEN_KEYWORD_DEF: {
18640 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
18643 size_t opening_newline_index = token_newline_index(parser);
18653 parser_lex(parser);
18657 bool valid_name =
true;
18659 switch (parser->
current.type) {
18660 case PM_CASE_OPERATOR:
18661 pm_parser_scope_push(parser,
true);
18662 lex_state_set(parser, PM_LEX_STATE_ENDFN);
18663 parser_lex(parser);
18667 case PM_TOKEN_IDENTIFIER: {
18668 parser_lex(parser);
18670 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18671 receiver = parse_variable_call(parser);
18673 pm_parser_scope_push(parser,
true);
18674 lex_state_set(parser, PM_LEX_STATE_FNAME);
18675 parser_lex(parser);
18678 name = parse_method_definition_name(parser);
18680 pm_refute_numbered_parameter(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous));
18681 pm_parser_scope_push(parser,
true);
18688 case PM_TOKEN_INSTANCE_VARIABLE:
18689 case PM_TOKEN_CLASS_VARIABLE:
18690 case PM_TOKEN_GLOBAL_VARIABLE:
18691 valid_name =
false;
18693 case PM_TOKEN_CONSTANT:
18694 case PM_TOKEN_KEYWORD_NIL:
18695 case PM_TOKEN_KEYWORD_SELF:
18696 case PM_TOKEN_KEYWORD_TRUE:
18697 case PM_TOKEN_KEYWORD_FALSE:
18698 case PM_TOKEN_KEYWORD___FILE__:
18699 case PM_TOKEN_KEYWORD___LINE__:
18700 case PM_TOKEN_KEYWORD___ENCODING__: {
18701 pm_parser_scope_push(parser,
true);
18702 parser_lex(parser);
18706 if (match2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON)) {
18707 lex_state_set(parser, PM_LEX_STATE_FNAME);
18708 parser_lex(parser);
18711 switch (identifier.
type) {
18712 case PM_TOKEN_CONSTANT:
18713 receiver = UP(pm_constant_read_node_create(parser, &identifier));
18715 case PM_TOKEN_INSTANCE_VARIABLE:
18716 receiver = UP(pm_instance_variable_read_node_create(parser, &identifier));
18718 case PM_TOKEN_CLASS_VARIABLE:
18719 receiver = UP(pm_class_variable_read_node_create(parser, &identifier));
18721 case PM_TOKEN_GLOBAL_VARIABLE:
18722 receiver = UP(pm_global_variable_read_node_create(parser, &identifier));
18724 case PM_TOKEN_KEYWORD_NIL:
18725 receiver = UP(pm_nil_node_create(parser, &identifier));
18727 case PM_TOKEN_KEYWORD_SELF:
18728 receiver = UP(pm_self_node_create(parser, &identifier));
18730 case PM_TOKEN_KEYWORD_TRUE:
18731 receiver = UP(pm_true_node_create(parser, &identifier));
18733 case PM_TOKEN_KEYWORD_FALSE:
18734 receiver = UP(pm_false_node_create(parser, &identifier));
18736 case PM_TOKEN_KEYWORD___FILE__:
18737 receiver = UP(pm_source_file_node_create(parser, &identifier));
18739 case PM_TOKEN_KEYWORD___LINE__:
18740 receiver = UP(pm_source_line_node_create(parser, &identifier));
18742 case PM_TOKEN_KEYWORD___ENCODING__:
18743 receiver = UP(pm_source_encoding_node_create(parser, &identifier));
18749 name = parse_method_definition_name(parser);
18759 case PM_TOKEN_PARENTHESIS_LEFT: {
18764 context_pop(parser);
18765 parser_lex(parser);
18768 pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_DEF_RECEIVER, (uint16_t) (depth + 1));
18770 accept1(parser, PM_TOKEN_NEWLINE);
18771 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
18774 lex_state_set(parser, PM_LEX_STATE_FNAME);
18775 expect2(parser, PM_TOKEN_DOT, PM_TOKEN_COLON_COLON, PM_ERR_DEF_RECEIVER_TERM);
18778 receiver = UP(pm_parentheses_node_create(parser, &lparen, expression, &rparen, 0));
18782 pm_parser_scope_push(parser,
true);
18784 name = parse_method_definition_name(parser);
18788 pm_parser_scope_push(parser,
true);
18789 name = parse_method_definition_name(parser);
18797 bool accept_endless_def =
true;
18798 switch (parser->
current.type) {
18799 case PM_TOKEN_PARENTHESIS_LEFT: {
18800 parser_lex(parser);
18803 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18808 params = parse_parameters(
18810 PM_BINDING_POWER_DEFINED,
18812 allow_trailing_comma,
18816 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18817 (uint16_t) (depth + 1)
18821 lex_state_set(parser, PM_LEX_STATE_BEG);
18824 context_pop(parser);
18825 if (!accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
18834 case PM_CASE_PARAMETER: {
18837 if (parser->
current.type == PM_TOKEN_LABEL) {
18838 lex_state_set(parser, parser->
lex_state | PM_LEX_STATE_LABEL);
18841 params = parse_parameters(
18843 PM_BINDING_POWER_DEFINED,
18849 PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES,
18850 (uint16_t) (depth + 1)
18855 accept_endless_def =
false;
18857 context_pop(parser);
18862 context_pop(parser);
18871 if (accept1(parser, PM_TOKEN_EQUAL)) {
18872 if (token_is_setter_name(&name)) {
18873 pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
18875 if (!accept_endless_def) {
18876 pm_parser_err_previous(parser, PM_ERR_DEF_ENDLESS_PARAMETERS);
18882 PM_PARSER_ERR_FORMAT(parser, PM_TOKEN_START(parser, &def_keyword), PM_TOKENS_LENGTH(&def_keyword, &parser->
previous), PM_ERR_UNEXPECTED_PARAMETER_DEFAULT_VALUE,
"endless method definition");
18887 pm_do_loop_stack_push(parser,
false);
18888 statements = UP(pm_statements_node_create(parser));
18890 bool allow_command_call;
18892 allow_command_call = accepts_command_call;
18895 allow_command_call = binding_power == PM_BINDING_POWER_ASSIGNMENT || binding_power < PM_BINDING_POWER_COMPOSITION;
18898 pm_node_t *statement = parse_expression(parser, PM_BINDING_POWER_DEFINED + 1, allow_command_call,
false, PM_ERR_DEF_ENDLESS, (uint16_t) (depth + 1));
18902 if (PM_NODE_TYPE_P(statement, PM_CALL_NODE)) {
18905 if (call->
arguments != NULL && call->
block != NULL && PM_NODE_TYPE_P(call->
block, PM_BLOCK_NODE)) {
18909 pm_parser_err_node(parser, call->
block, PM_ERR_DEF_ENDLESS_DO_BLOCK);
18914 if (accept1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
18918 pm_node_t *value = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right,
false,
false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
18919 context_pop(parser);
18921 statement = UP(pm_rescue_modifier_node_create(parser, statement, &rescue_keyword, value));
18924 pm_statements_node_body_append(parser, (
pm_statements_node_t *) statements, statement,
false);
18925 pm_do_loop_stack_pop(parser);
18926 context_pop(parser);
18928 if (lparen.
start == NULL) {
18929 lex_state_set(parser, PM_LEX_STATE_BEG);
18931 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_DEF_PARAMS_TERM);
18933 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
18936 pm_accepts_block_stack_push(parser,
true);
18937 pm_do_loop_stack_push(parser,
false);
18939 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
18940 pm_accepts_block_stack_push(parser,
true);
18941 statements = UP(parse_statements(parser,
PM_CONTEXT_DEF, (uint16_t) (depth + 1)));
18942 pm_accepts_block_stack_pop(parser);
18945 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
18946 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
18947 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &def_keyword, def_keyword.
start, (
pm_statements_node_t *) statements, PM_RESCUES_DEF, (uint16_t) (depth + 1)));
18949 parser_warn_indentation_mismatch(parser, opening_newline_index, &def_keyword,
false,
false);
18952 pm_accepts_block_stack_pop(parser);
18953 pm_do_loop_stack_pop(parser);
18955 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_DEF_TERM, &def_keyword);
18961 pm_parser_scope_pop(parser);
18968 pm_constant_id_t name_id = pm_parser_constant_id_raw(parser, name.
start, parse_operator_symbol_name(&name));
18970 flush_block_exits(parser, previous_block_exits);
18972 return UP(pm_def_node_create(
18981 NTOK2PTR(
operator),
18985 NTOK2PTR(end_keyword)
18988 case PM_TOKEN_KEYWORD_DEFINED: {
18989 parser_lex(parser);
18997 bool newline = accept1(parser, PM_TOKEN_NEWLINE);
18999 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19002 if (newline && accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19003 expression = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->
previous, 0));
19006 expression = parse_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19009 accept1(parser, PM_TOKEN_NEWLINE);
19010 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19015 expression = parse_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_DEFINED_EXPRESSION, (uint16_t) (depth + 1));
19018 context_pop(parser);
19019 return UP(pm_defined_node_create(
19027 case PM_TOKEN_KEYWORD_END_UPCASE: {
19028 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19029 pm_parser_err_current(parser, PM_ERR_STATEMENT_POSTEXE_END);
19032 parser_lex(parser);
19035 if (context_def_p(parser)) {
19036 pm_parser_warn_token(parser, &keyword, PM_WARN_END_IN_METHOD);
19039 expect1(parser, PM_TOKEN_BRACE_LEFT, PM_ERR_END_UPCASE_BRACE);
19043 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_END_UPCASE_TERM, &opening);
19044 return UP(pm_post_execution_node_create(parser, &keyword, &opening, statements, &parser->
previous));
19046 case PM_TOKEN_KEYWORD_FALSE:
19047 parser_lex(parser);
19048 return UP(pm_false_node_create(parser, &parser->
previous));
19049 case PM_TOKEN_KEYWORD_FOR: {
19050 size_t opening_newline_index = token_newline_index(parser);
19051 parser_lex(parser);
19059 if (accept1(parser, PM_TOKEN_USTAR)) {
19063 if (token_begins_expression_p(parser->
current.type)) {
19064 name = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19067 index = UP(pm_splat_node_create(parser, &star_operator, name));
19068 }
else if (token_begins_expression_p(parser->
current.type)) {
19069 index = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA, (uint16_t) (depth + 1));
19071 pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
19072 index = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &for_keyword), PM_TOKEN_LENGTH(&for_keyword)));
19076 if (match1(parser, PM_TOKEN_COMMA)) {
19077 index = parse_targets(parser, index, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19079 index = parse_target(parser, index,
false,
false);
19082 context_pop(parser);
19083 pm_do_loop_stack_push(parser,
true);
19085 expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
19088 pm_node_t *collection = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_FOR_COLLECTION, (uint16_t) (depth + 1));
19089 pm_do_loop_stack_pop(parser);
19092 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19095 if (!match2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE)) {
19101 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19102 statements = parse_statements(parser,
PM_CONTEXT_FOR, (uint16_t) (depth + 1));
19105 parser_warn_indentation_mismatch(parser, opening_newline_index, &for_keyword,
false,
false);
19106 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM, &for_keyword);
19108 return UP(pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, NTOK2PTR(do_keyword), &parser->
previous));
19110 case PM_TOKEN_KEYWORD_IF:
19111 if (parser_end_of_line_p(parser)) {
19112 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->
current, PM_WARN_KEYWORD_EOL);
19115 size_t opening_newline_index = token_newline_index(parser);
19116 bool if_after_else = parser->
previous.
type == PM_TOKEN_KEYWORD_ELSE;
19117 parser_lex(parser);
19119 return parse_conditional(parser,
PM_CONTEXT_IF, opening_newline_index, if_after_else, (uint16_t) (depth + 1));
19120 case PM_TOKEN_KEYWORD_UNDEF: {
19121 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19122 pm_parser_err_current(parser, PM_ERR_STATEMENT_UNDEF);
19125 parser_lex(parser);
19127 pm_node_t *name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19129 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19131 pm_undef_node_append(parser->
arena, undef, name);
19133 while (match1(parser, PM_TOKEN_COMMA)) {
19134 lex_state_set(parser, PM_LEX_STATE_FNAME | PM_LEX_STATE_FITEM);
19135 parser_lex(parser);
19136 name = parse_undef_argument(parser, (uint16_t) (depth + 1));
19138 if (PM_NODE_TYPE_P(name, PM_MISSING_NODE)) {
19142 pm_undef_node_append(parser->
arena, undef, name);
19148 case PM_TOKEN_KEYWORD_NOT: {
19149 parser_lex(parser);
19158 if (!accepts_command_call && !match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19159 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES)) {
19160 pm_parser_err(parser, PM_TOKEN_END(parser, &parser->
previous), 1, PM_ERR_EXPECT_LPAREN_AFTER_NOT_LPAREN);
19162 accept1(parser, PM_TOKEN_NEWLINE);
19163 pm_parser_err_current(parser, PM_ERR_EXPECT_LPAREN_AFTER_NOT_OTHER);
19166 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
19169 accept1(parser, PM_TOKEN_NEWLINE);
19171 if (accept1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
19174 if (accept1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19175 receiver = UP(pm_parentheses_node_create(parser, &lparen, NULL, &parser->
previous, 0));
19177 arguments.
opening_loc = TOK2LOC(parser, &lparen);
19178 receiver = parse_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19181 accept1(parser, PM_TOKEN_NEWLINE);
19182 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
19187 receiver = parse_expression(parser, PM_BINDING_POWER_NOT,
true,
false, PM_ERR_NOT_EXPRESSION, (uint16_t) (depth + 1));
19190 return UP(pm_call_node_not_create(parser, receiver, &message, &arguments));
19192 case PM_TOKEN_KEYWORD_UNLESS: {
19193 size_t opening_newline_index = token_newline_index(parser);
19194 parser_lex(parser);
19196 return parse_conditional(parser,
PM_CONTEXT_UNLESS, opening_newline_index,
false, (uint16_t) (depth + 1));
19198 case PM_TOKEN_KEYWORD_MODULE: {
19200 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
19202 size_t opening_newline_index = token_newline_index(parser);
19203 parser_lex(parser);
19206 pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_MODULE_NAME, (uint16_t) (depth + 1));
19211 if (PM_NODE_TYPE_P(constant_path, PM_MISSING_NODE)) {
19212 pop_block_exits(parser, previous_block_exits);
19215 return UP(pm_module_node_create(parser, NULL, &module_keyword, constant_path, &missing, NULL, &missing));
19218 while (accept1(parser, PM_TOKEN_COLON_COLON)) {
19221 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
19222 constant_path = UP(pm_constant_path_node_create(parser, constant_path, &double_colon, &parser->
previous));
19229 if (name.
type != PM_TOKEN_CONSTANT) {
19230 pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
19233 pm_parser_scope_push(parser,
true);
19234 accept2(parser, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE);
19237 if (!match4(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE, PM_TOKEN_KEYWORD_END)) {
19238 pm_accepts_block_stack_push(parser,
true);
19239 statements = UP(parse_statements(parser,
PM_CONTEXT_MODULE, (uint16_t) (depth + 1)));
19240 pm_accepts_block_stack_pop(parser);
19243 if (match3(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE, PM_TOKEN_KEYWORD_ELSE)) {
19244 assert(statements == NULL || PM_NODE_TYPE_P(statements, PM_STATEMENTS_NODE));
19245 statements = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &module_keyword, module_keyword.
start, (
pm_statements_node_t *) statements, PM_RESCUES_MODULE, (uint16_t) (depth + 1)));
19247 parser_warn_indentation_mismatch(parser, opening_newline_index, &module_keyword,
false,
false);
19253 pm_parser_scope_pop(parser);
19254 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM, &module_keyword);
19256 if (context_def_p(parser)) {
19257 pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
19260 pop_block_exits(parser, previous_block_exits);
19262 return UP(pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->
previous));
19264 case PM_TOKEN_KEYWORD_NIL:
19265 parser_lex(parser);
19266 return UP(pm_nil_node_create(parser, &parser->
previous));
19267 case PM_TOKEN_KEYWORD_REDO: {
19268 parser_lex(parser);
19275 case PM_TOKEN_KEYWORD_RETRY: {
19276 parser_lex(parser);
19279 parse_retry(parser, node);
19283 case PM_TOKEN_KEYWORD_SELF:
19284 parser_lex(parser);
19285 return UP(pm_self_node_create(parser, &parser->
previous));
19286 case PM_TOKEN_KEYWORD_TRUE:
19287 parser_lex(parser);
19288 return UP(pm_true_node_create(parser, &parser->
previous));
19289 case PM_TOKEN_KEYWORD_UNTIL: {
19290 size_t opening_newline_index = token_newline_index(parser);
19293 pm_do_loop_stack_push(parser,
true);
19295 parser_lex(parser);
19297 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
19299 pm_do_loop_stack_pop(parser);
19300 context_pop(parser);
19303 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19306 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19310 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19311 pm_accepts_block_stack_push(parser,
true);
19312 statements = parse_statements(parser,
PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
19313 pm_accepts_block_stack_pop(parser);
19314 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19317 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword,
false,
false);
19318 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM, &keyword);
19320 return UP(pm_until_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->
previous, predicate, statements, 0));
19322 case PM_TOKEN_KEYWORD_WHILE: {
19323 size_t opening_newline_index = token_newline_index(parser);
19326 pm_do_loop_stack_push(parser,
true);
19328 parser_lex(parser);
19330 pm_node_t *predicate = parse_value_expression(parser, PM_BINDING_POWER_COMPOSITION,
true,
false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
19332 pm_do_loop_stack_pop(parser);
19333 context_pop(parser);
19336 if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19339 expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19343 if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19344 pm_accepts_block_stack_push(parser,
true);
19345 statements = parse_statements(parser,
PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
19346 pm_accepts_block_stack_pop(parser);
19347 accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
19350 parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword,
false,
false);
19351 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM, &keyword);
19353 return UP(pm_while_node_create(parser, &keyword, NTOK2PTR(do_keyword), &parser->
previous, predicate, statements, 0));
19355 case PM_TOKEN_PERCENT_LOWER_I: {
19356 parser_lex(parser);
19361 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19362 accept1(parser, PM_TOKEN_WORDS_SEP);
19363 if (match1(parser, PM_TOKEN_STRING_END))
break;
19367 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19369 if (current == NULL) {
19370 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->
current, NULL));
19371 parser_lex(parser);
19372 }
else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19373 pm_node_t *
string = UP(pm_string_node_create_current_string(parser, NULL, &parser->
current, NULL));
19374 parser_lex(parser);
19376 }
else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19379 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->
unescaped));
19380 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->
previous, NULL));
19381 parser_lex(parser);
19384 pm_interpolated_symbol_node_append(parser->
arena, interpolated, first_string);
19385 pm_interpolated_symbol_node_append(parser->
arena, interpolated, second_string);
19388 current = UP(interpolated);
19390 assert(
false &&
"unreachable");
19395 pm_array_node_elements_append(parser->
arena, array, current);
19398 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
19403 if (match1(parser, PM_TOKEN_EOF)) {
19404 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_LOWER_TERM);
19407 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
19409 pm_array_node_close_set(parser, array, &closing);
19413 case PM_TOKEN_PERCENT_UPPER_I: {
19414 parser_lex(parser);
19422 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19423 switch (parser->
current.type) {
19424 case PM_TOKEN_WORDS_SEP: {
19425 if (current == NULL) {
19431 pm_array_node_elements_append(parser->
arena, array, current);
19435 parser_lex(parser);
19438 case PM_TOKEN_STRING_CONTENT: {
19439 if (current == NULL) {
19443 current = UP(pm_symbol_node_create_current_string(parser, NULL, &parser->
current, NULL));
19444 parser_lex(parser);
19445 }
else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19449 pm_node_t *
string = UP(pm_string_node_create_current_string(parser, NULL, &parser->
current, NULL));
19450 parser_lex(parser);
19453 }
else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19459 .
type = PM_TOKEN_STRING_CONTENT,
19464 pm_node_t *first_string = UP(pm_string_node_create_unescaped(parser, NULL, &content, NULL, &cast->
unescaped));
19465 pm_node_t *second_string = UP(pm_string_node_create_current_string(parser, NULL, &parser->
previous, NULL));
19466 parser_lex(parser);
19469 pm_interpolated_symbol_node_append(parser->
arena, interpolated, first_string);
19470 pm_interpolated_symbol_node_append(parser->
arena, interpolated, second_string);
19473 current = UP(interpolated);
19475 assert(
false &&
"unreachable");
19480 case PM_TOKEN_EMBVAR: {
19481 bool start_location_set =
false;
19482 if (current == NULL) {
19486 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19487 }
else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19493 current = UP(pm_symbol_node_to_string_node(parser, (
pm_symbol_node_t *) current));
19494 pm_interpolated_symbol_node_append(parser->
arena, interpolated, current);
19495 PM_NODE_START_SET_NODE(interpolated, current);
19496 start_location_set =
true;
19497 current = UP(interpolated);
19503 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19505 if (!start_location_set) {
19506 PM_NODE_START_SET_NODE(current, part);
19510 case PM_TOKEN_EMBEXPR_BEGIN: {
19511 bool start_location_set =
false;
19512 if (current == NULL) {
19516 current = UP(pm_interpolated_symbol_node_create(parser, NULL, NULL, NULL));
19517 }
else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
19524 current = UP(pm_symbol_node_to_string_node(parser, (
pm_symbol_node_t *) current));
19525 pm_interpolated_symbol_node_append(parser->
arena, interpolated, current);
19526 PM_NODE_START_SET_NODE(interpolated, current);
19527 start_location_set =
true;
19528 current = UP(interpolated);
19529 }
else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
19533 assert(
false &&
"unreachable");
19536 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19538 if (!start_location_set) {
19539 PM_NODE_START_SET_NODE(current, part);
19544 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_UPPER_ELEMENT);
19545 parser_lex(parser);
19552 pm_array_node_elements_append(parser->
arena, array, current);
19556 if (match1(parser, PM_TOKEN_EOF)) {
19557 pm_parser_err_token(parser, &opening, PM_ERR_LIST_I_UPPER_TERM);
19560 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_UPPER_TERM);
19562 pm_array_node_close_set(parser, array, &closing);
19566 case PM_TOKEN_PERCENT_LOWER_W: {
19567 parser_lex(parser);
19572 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19573 accept1(parser, PM_TOKEN_WORDS_SEP);
19574 if (match1(parser, PM_TOKEN_STRING_END))
break;
19578 while (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19579 pm_node_t *
string = UP(pm_string_node_create_current_string(parser, NULL, &parser->
current, NULL));
19582 if (current == NULL) {
19584 }
else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19586 }
else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19588 pm_interpolated_string_node_append(parser->
arena, interpolated, current);
19589 pm_interpolated_string_node_append(parser->
arena, interpolated,
string);
19590 current = UP(interpolated);
19592 assert(
false &&
"unreachable");
19594 parser_lex(parser);
19598 pm_array_node_elements_append(parser->
arena, array, current);
19601 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
19606 if (match1(parser, PM_TOKEN_EOF)) {
19607 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_LOWER_TERM);
19610 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
19613 pm_array_node_close_set(parser, array, &closing);
19616 case PM_TOKEN_PERCENT_UPPER_W: {
19617 parser_lex(parser);
19625 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19626 switch (parser->
current.type) {
19627 case PM_TOKEN_WORDS_SEP: {
19632 if (current == NULL) {
19639 pm_array_node_elements_append(parser->
arena, array, current);
19643 parser_lex(parser);
19646 case PM_TOKEN_STRING_CONTENT: {
19647 pm_node_t *
string = UP(pm_string_node_create_current_string(parser, NULL, &parser->
current, NULL));
19648 pm_node_flag_set(
string, parse_unescaped_encoding(parser));
19649 parser_lex(parser);
19651 if (current == NULL) {
19657 }
else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19662 }
else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19668 pm_interpolated_string_node_append(parser->
arena, interpolated, current);
19669 pm_interpolated_string_node_append(parser->
arena, interpolated,
string);
19670 current = UP(interpolated);
19672 assert(
false &&
"unreachable");
19677 case PM_TOKEN_EMBVAR: {
19678 if (current == NULL) {
19683 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19684 }
else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19690 pm_interpolated_string_node_append(parser->
arena, interpolated, current);
19691 current = UP(interpolated);
19698 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19702 case PM_TOKEN_EMBEXPR_BEGIN: {
19703 if (current == NULL) {
19708 current = UP(pm_interpolated_string_node_create(parser, NULL, NULL, NULL));
19709 }
else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
19715 pm_interpolated_string_node_append(parser->
arena, interpolated, current);
19716 current = UP(interpolated);
19717 }
else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
19722 assert(
false &&
"unreachable");
19725 pm_node_t *part = parse_string_part(parser, (uint16_t) (depth + 1));
19730 expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_UPPER_ELEMENT);
19731 parser_lex(parser);
19738 pm_array_node_elements_append(parser->
arena, array, current);
19742 if (match1(parser, PM_TOKEN_EOF)) {
19743 pm_parser_err_token(parser, &opening, PM_ERR_LIST_W_UPPER_TERM);
19746 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_UPPER_TERM);
19749 pm_array_node_close_set(parser, array, &closing);
19752 case PM_TOKEN_REGEXP_BEGIN: {
19754 parser_lex(parser);
19756 if (match1(parser, PM_TOKEN_REGEXP_END)) {
19761 .
type = PM_TOKEN_STRING_CONTENT,
19766 parser_lex(parser);
19768 pm_node_t *node = UP(pm_regular_expression_node_create(parser, &opening, &content, &parser->
previous));
19769 pm_node_flag_set(node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
19776 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19784 parser_lex(parser);
19789 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
19796 if (!match1(parser, PM_TOKEN_EQUAL_TILDE)) {
19797 parse_regular_expression_errors(parser, node);
19800 pm_node_flag_set(UP(node), parse_and_validate_regular_expression_encoding(parser, &unescaped, ascii_only, FL(node)));
19806 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19808 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->
previous, NULL, &unescaped));
19813 pm_node_flag_set(part, PM_STRING_FLAGS_FORCED_BINARY_ENCODING);
19816 pm_interpolated_regular_expression_node_append(parser->
arena, interpolated, part);
19821 interpolated = pm_interpolated_regular_expression_node_create(parser, &opening);
19827 while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
19828 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19829 pm_interpolated_regular_expression_node_append(parser->
arena, interpolated, part);
19834 if (match1(parser, PM_TOKEN_EOF)) {
19835 pm_parser_err_token(parser, &opening, PM_ERR_REGEXP_TERM);
19838 expect1(parser, PM_TOKEN_REGEXP_END, PM_ERR_REGEXP_TERM);
19841 pm_interpolated_regular_expression_node_closing_set(parser, interpolated, &closing);
19842 return UP(interpolated);
19844 case PM_TOKEN_BACKTICK:
19845 case PM_TOKEN_PERCENT_LOWER_X: {
19846 parser_lex(parser);
19853 if (match1(parser, PM_TOKEN_STRING_END)) {
19858 .
type = PM_TOKEN_STRING_CONTENT,
19863 parser_lex(parser);
19864 return UP(pm_xstring_node_create(parser, &opening, &content, &parser->
previous));
19869 if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
19876 parser_lex(parser);
19878 if (match1(parser, PM_TOKEN_STRING_END)) {
19879 pm_node_t *node = UP(pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->
current, &unescaped));
19880 pm_node_flag_set(node, parse_unescaped_encoding(parser));
19881 parser_lex(parser);
19887 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19889 pm_node_t *part = UP(pm_string_node_create_unescaped(parser, NULL, &parser->
previous, NULL, &unescaped));
19890 pm_node_flag_set(part, parse_unescaped_encoding(parser));
19892 pm_interpolated_xstring_node_append(parser->
arena, node, part);
19897 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
19901 while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
19902 if ((part = parse_string_part(parser, (uint16_t) (depth + 1))) != NULL) {
19903 pm_interpolated_xstring_node_append(parser->
arena, node, part);
19908 if (match1(parser, PM_TOKEN_EOF)) {
19909 pm_parser_err_token(parser, &opening, PM_ERR_XSTRING_TERM);
19912 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_XSTRING_TERM);
19914 pm_interpolated_xstring_node_closing_set(parser, node, &closing);
19918 case PM_TOKEN_USTAR: {
19919 parser_lex(parser);
19924 if (binding_power != PM_BINDING_POWER_STATEMENT) {
19925 pm_parser_err_prefix(parser, diag_id);
19926 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous)));
19932 if (token_begins_expression_p(parser->
current.type)) {
19933 name = parse_expression(parser, PM_BINDING_POWER_INDEX,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_STAR, (uint16_t) (depth + 1));
19936 pm_node_t *splat = UP(pm_splat_node_create(parser, &
operator, name));
19938 if (match1(parser, PM_TOKEN_COMMA)) {
19939 return parse_targets_validate(parser, splat, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
19941 return parse_target_validate(parser, splat,
true);
19944 case PM_TOKEN_BANG: {
19945 if (binding_power > PM_BINDING_POWER_UNARY) {
19946 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19949 parser_lex(parser);
19952 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->
previous.
type].right, binding_power < PM_BINDING_POWER_MATCH,
false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19953 pm_call_node_t *node = pm_call_node_unary_create(parser, &
operator, receiver,
"!");
19955 pm_conditional_predicate(parser, receiver, PM_CONDITIONAL_PREDICATE_TYPE_NOT);
19958 case PM_TOKEN_TILDE: {
19959 if (binding_power > PM_BINDING_POWER_UNARY) {
19960 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19962 parser_lex(parser);
19965 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->
previous.
type].right,
false,
false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19966 pm_call_node_t *node = pm_call_node_unary_create(parser, &
operator, receiver,
"~");
19970 case PM_TOKEN_UMINUS: {
19971 if (binding_power > PM_BINDING_POWER_UNARY) {
19972 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
19974 parser_lex(parser);
19977 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->
previous.
type].right,
false,
false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19978 pm_call_node_t *node = pm_call_node_unary_create(parser, &
operator, receiver,
"-@");
19982 case PM_TOKEN_UMINUS_NUM: {
19983 parser_lex(parser);
19986 pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->
previous.
type].right,
false,
false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
19988 if (accept1(parser, PM_TOKEN_STAR_STAR)) {
19990 pm_node_t *exponent = parse_expression(parser, pm_binding_powers[exponent_operator.
type].right,
false,
false, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
19991 node = UP(pm_call_node_binary_create(parser, node, &exponent_operator, exponent, 0));
19992 node = UP(pm_call_node_unary_create(parser, &
operator, node,
"-@"));
19994 switch (PM_NODE_TYPE(node)) {
19995 case PM_INTEGER_NODE:
19996 case PM_FLOAT_NODE:
19997 case PM_RATIONAL_NODE:
19998 case PM_IMAGINARY_NODE:
19999 parse_negative_numeric(node);
20002 node = UP(pm_call_node_unary_create(parser, &
operator, node,
"-@"));
20009 case PM_TOKEN_MINUS_GREATER: {
20013 size_t opening_newline_index = token_newline_index(parser);
20014 pm_accepts_block_stack_push(parser,
true);
20015 parser_lex(parser);
20018 pm_parser_scope_push(parser,
false);
20022 switch (parser->
current.type) {
20023 case PM_TOKEN_PARENTHESIS_LEFT: {
20025 parser_lex(parser);
20027 if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
20028 block_parameters = pm_block_parameters_node_create(parser, NULL, &opening);
20030 block_parameters = parse_block_parameters(parser,
false, &opening,
true,
true, (uint16_t) (depth + 1));
20033 accept1(parser, PM_TOKEN_NEWLINE);
20034 expect1(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_ERR_EXPECT_RPAREN);
20036 pm_block_parameters_node_closing_set(parser, block_parameters, &parser->
previous);
20039 case PM_CASE_PARAMETER: {
20040 pm_accepts_block_stack_push(parser,
false);
20041 block_parameters = parse_block_parameters(parser,
false, NULL,
true,
false, (uint16_t) (depth + 1));
20042 pm_accepts_block_stack_pop(parser);
20046 block_parameters = NULL;
20055 if (accept1(parser, PM_TOKEN_LAMBDA_BEGIN)) {
20058 if (!match1(parser, PM_TOKEN_BRACE_RIGHT)) {
20062 parser_warn_indentation_mismatch(parser, opening_newline_index, &
operator,
false,
false);
20063 expect1_opening(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_LAMBDA_TERM_BRACE, &opening);
20065 expect1(parser, PM_TOKEN_KEYWORD_DO, PM_ERR_LAMBDA_OPEN);
20068 if (!match3(parser, PM_TOKEN_KEYWORD_END, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20069 pm_accepts_block_stack_push(parser,
true);
20071 pm_accepts_block_stack_pop(parser);
20074 if (match2(parser, PM_TOKEN_KEYWORD_RESCUE, PM_TOKEN_KEYWORD_ENSURE)) {
20075 assert(body == NULL || PM_NODE_TYPE_P(body, PM_STATEMENTS_NODE));
20076 body = UP(parse_rescues_implicit_begin(parser, opening_newline_index, &
operator, opening.
start, (
pm_statements_node_t *) body, PM_RESCUES_LAMBDA, (uint16_t) (depth + 1)));
20078 parser_warn_indentation_mismatch(parser, opening_newline_index, &
operator,
false,
false);
20081 expect1_opening(parser, PM_TOKEN_KEYWORD_END, PM_ERR_LAMBDA_TERM_END, &
operator);
20085 pm_locals_order(parser, &parser->
current_scope->
locals, &locals, pm_parser_scope_toplevel_p(parser));
20086 pm_node_t *parameters = parse_blocklike_parameters(parser, UP(block_parameters), &
operator, &parser->
previous);
20088 pm_parser_scope_pop(parser);
20089 pm_accepts_block_stack_pop(parser);
20091 return UP(pm_lambda_node_create(parser, &locals, &
operator, &opening, &parser->
previous, parameters, body));
20093 case PM_TOKEN_UPLUS: {
20094 if (binding_power > PM_BINDING_POWER_UNARY) {
20095 pm_parser_err_prefix(parser, PM_ERR_UNARY_DISALLOWED);
20097 parser_lex(parser);
20100 pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->
previous.
type].right,
false,
false, PM_ERR_UNARY_RECEIVER, (uint16_t) (depth + 1));
20101 pm_call_node_t *node = pm_call_node_unary_create(parser, &
operator, receiver,
"+@");
20105 case PM_TOKEN_STRING_BEGIN:
20106 return parse_strings(parser, NULL, accepts_label, (uint16_t) (depth + 1));
20107 case PM_TOKEN_SYMBOL_BEGIN: {
20109 parser_lex(parser);
20111 return parse_symbol(parser, &lex_mode, PM_LEX_STATE_END, (uint16_t) (depth + 1));
20122 if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
20123 pm_parser_err_prefix(parser, diag_id);
20129 PM_PARSER_ERR_TOKEN_FORMAT(parser, &parser->
current, PM_ERR_UNEXPECTED_TOKEN_CLOSE_CONTEXT,
pm_token_type_human(parser->
current.type), context_human(recoverable));
20130 }
else if (diag_id == PM_ERR_CANNOT_PARSE_EXPRESSION) {
20136 pm_parser_err_prefix(parser, diag_id);
20139 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
previous), PM_TOKEN_LENGTH(&parser->
previous)));
20154parse_assignment_value(
pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power,
bool accepts_command_call,
pm_diagnostic_id_t diag_id, uint16_t depth) {
20155 pm_node_t *value = parse_value_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MATCH, false, diag_id, (uint16_t) (depth + 1));
20159 if (match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20163 parser_lex(parser);
20165 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right,
false,
false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20166 context_pop(parser);
20168 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20180 switch (PM_NODE_TYPE(node)) {
20181 case PM_BEGIN_NODE: {
20186 case PM_LOCAL_VARIABLE_WRITE_NODE: {
20188 pm_locals_read(&pm_parser_scope_find(parser, cast->
depth)->
locals, cast->
name);
20191 case PM_PARENTHESES_NODE: {
20193 if (cast->
body != NULL) parse_assignment_value_local(parser, cast->
body);
20196 case PM_STATEMENTS_NODE: {
20201 parse_assignment_value_local(parser, statement);
20223parse_assignment_values(
pm_parser_t *parser, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power,
bool accepts_command_call,
pm_diagnostic_id_t diag_id, uint16_t depth) {
20224 bool permitted =
true;
20225 if (previous_binding_power != PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_USTAR)) permitted =
false;
20227 pm_node_t *value = parse_starred_expression(parser, binding_power, previous_binding_power == PM_BINDING_POWER_ASSIGNMENT ? accepts_command_call : previous_binding_power < PM_BINDING_POWER_MODIFIER, diag_id, (uint16_t) (depth + 1));
20228 if (!permitted) pm_parser_err_node(parser, value, PM_ERR_UNEXPECTED_MULTI_WRITE);
20230 parse_assignment_value_local(parser, value);
20231 bool single_value =
true;
20233 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && (PM_NODE_TYPE_P(value, PM_SPLAT_NODE) || match1(parser, PM_TOKEN_COMMA))) {
20234 single_value =
false;
20237 pm_array_node_elements_append(parser->
arena, array, value);
20240 while (accept1(parser, PM_TOKEN_COMMA)) {
20241 pm_node_t *element = parse_starred_expression(parser, binding_power,
false, PM_ERR_ARRAY_ELEMENT, (uint16_t) (depth + 1));
20243 pm_array_node_elements_append(parser->
arena, array, element);
20244 if (PM_NODE_TYPE_P(element, PM_MISSING_NODE))
break;
20246 parse_assignment_value_local(parser, element);
20252 if ((single_value || (binding_power == (PM_BINDING_POWER_MULTI_ASSIGNMENT + 1))) && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
20256 parser_lex(parser);
20258 bool accepts_command_call_inner =
false;
20262 if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
20265 accepts_command_call_inner =
true;
20269 pm_node_t *right = parse_expression(parser, pm_binding_powers[PM_TOKEN_KEYWORD_RESCUE_MODIFIER].right, accepts_command_call_inner,
false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
20270 context_pop(parser);
20272 return UP(pm_rescue_modifier_node_create(parser, value, &rescue, right));
20288 pm_parser_err_token(parser,
operator, PM_ERR_OPERATOR_WRITE_ARGUMENTS);
20289 pm_node_unreference(parser, UP(call_node->
arguments));
20293 if (call_node->
block != NULL) {
20294 pm_parser_err_token(parser,
operator, PM_ERR_OPERATOR_WRITE_BLOCK);
20295 pm_node_unreference(parser, UP(call_node->
block));
20296 call_node->
block = NULL;
20325static inline const uint8_t *
20326pm_named_capture_escape_hex(
pm_buffer_t *unescaped,
const uint8_t *cursor,
const uint8_t *end) {
20329 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20330 uint8_t value = escape_hexadecimal_digit(*cursor);
20333 if (cursor < end && pm_char_is_hexadecimal_digit(*cursor)) {
20334 value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(*cursor));
20338 pm_buffer_append_byte(unescaped, value);
20340 pm_buffer_append_string(unescaped,
"\\x", 2);
20346static inline const uint8_t *
20347pm_named_capture_escape_octal(
pm_buffer_t *unescaped,
const uint8_t *cursor,
const uint8_t *end) {
20348 uint8_t value = (uint8_t) (*cursor -
'0');
20351 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20352 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor -
'0'));
20355 if (cursor < end && pm_char_is_octal_digit(*cursor)) {
20356 value = ((uint8_t) (value << 3)) | ((uint8_t) (*cursor -
'0'));
20361 pm_buffer_append_byte(unescaped, value);
20365static inline const uint8_t *
20367 const uint8_t *start = cursor - 1;
20370 if (cursor >= end) {
20371 pm_buffer_append_string(unescaped,
"\\u", 2);
20375 if (*cursor !=
'{') {
20376 size_t length = pm_strspn_hexadecimal_digit(cursor, MIN(end - cursor, 4));
20377 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20379 if (!pm_buffer_append_unicode_codepoint(unescaped, value)) {
20380 pm_buffer_append_string(unescaped, (
const char *) start, (
size_t) ((cursor + length) - start));
20383 return cursor + length;
20388 while (cursor < end && *cursor ==
' ') cursor++;
20390 if (cursor >= end)
break;
20391 if (*cursor ==
'}') {
20396 size_t length = pm_strspn_hexadecimal_digit(cursor, end - cursor);
20400 uint32_t value = escape_unicode(parser, cursor, length, error_location);
20402 (void) pm_buffer_append_unicode_codepoint(unescaped, value);
20410pm_named_capture_escape(
pm_parser_t *parser,
pm_buffer_t *unescaped,
const uint8_t *source,
const size_t length,
const uint8_t *cursor,
const pm_location_t *error_location) {
20411 const uint8_t *end = source + length;
20412 pm_buffer_append_string(unescaped, (
const char *) source, (
size_t) (cursor - source));
20415 if (++cursor >= end) {
20416 pm_buffer_append_byte(unescaped,
'\\');
20422 cursor = pm_named_capture_escape_hex(unescaped, cursor, end);
20424 case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
20425 cursor = pm_named_capture_escape_octal(unescaped, cursor, end);
20428 cursor = pm_named_capture_escape_unicode(parser, unescaped, cursor, end, error_location);
20431 pm_buffer_append_byte(unescaped,
'\\');
20435 const uint8_t *next_cursor = pm_memchr(cursor,
'\\', (
size_t) (end - cursor), parser->
encoding_changed, parser->
encoding);
20436 if (next_cursor == NULL)
break;
20438 pm_buffer_append_string(unescaped, (
const char *) cursor, (
size_t) (next_cursor - cursor));
20439 cursor = next_cursor;
20442 pm_buffer_append_string(unescaped, (
const char *) cursor, (
size_t) (end - cursor));
20450parse_regular_expression_named_capture(
const pm_string_t *capture,
void *data) {
20471 pm_named_capture_escape(parser, &unescaped, source, length, cursor, callback_data->
shared ? NULL : &call->receiver->location);
20476 const uint8_t *start;
20477 const uint8_t *end;
20482 if (!pm_slice_is_valid_local(parser, source, source + length)) {
20487 if (callback_data->
shared) {
20491 end = source + length;
20492 name = pm_parser_constant_id_raw(parser, start, end);
20499 void *memory =
xmalloc(length);
20500 if (memory == NULL) abort();
20502 memcpy(memory, source, length);
20503 name = pm_parser_constant_id_owned(parser, (uint8_t *) memory, length);
20508 if (name != 0 && !pm_constant_id_list_includes(names, name)) {
20509 pm_constant_id_list_append(parser->
arena, names, name);
20512 if ((depth = pm_parser_local_depth_constant_id(parser, name)) == -1) {
20515 if (pm_local_is_keyword((
const char *) source, length)) {
20522 pm_parser_local_add(parser, name, start, end, 0);
20527 if (callback_data->
match == NULL) {
20528 callback_data->
match = pm_match_write_node_create(parser, call);
20533 pm_node_t *target = UP(pm_local_variable_target_node_create(parser, &TOK2LOC(parser, &((
pm_token_t) { .type = 0, .start = start, .end = end })), name, depth == -1 ? 0 : (uint32_t) depth));
20550 .shared = content->
type == PM_STRING_SHARED
20557 .shared = content->
type == PM_STRING_SHARED
20560 pm_regexp_parse(parser,
pm_string_source(content),
pm_string_length(content), extended_mode, parse_regular_expression_named_capture, &callback_data, parse_regular_expression_error, &error_data);
20562 if (callback_data.
match != NULL) {
20563 return UP(callback_data.
match);
20570parse_expression_infix(
pm_parser_t *parser,
pm_node_t *node, pm_binding_power_t previous_binding_power, pm_binding_power_t binding_power,
bool accepts_command_call, uint16_t depth) {
20573 switch (token.type) {
20574 case PM_TOKEN_EQUAL: {
20575 switch (PM_NODE_TYPE(node)) {
20576 case PM_CALL_NODE: {
20582 if (PM_NODE_FLAG_P(call_node, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20583 pm_parser_local_add_location(parser, &call_node->
message_loc, 0);
20587 case PM_CASE_WRITABLE: {
20591 if (PM_NODE_TYPE_P(node, PM_IT_LOCAL_VARIABLE_READ_NODE)) {
20592 pm_parser_local_add_location(parser, &node->
location, 0);
20595 parser_lex(parser);
20596 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20598 if (PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) && previous_binding_power != PM_BINDING_POWER_STATEMENT) {
20599 pm_parser_err_node(parser, node, PM_ERR_UNEXPECTED_MULTI_WRITE);
20602 return parse_write(parser, node, &token, value);
20604 case PM_SPLAT_NODE: {
20606 pm_multi_target_node_targets_append(parser, multi_target, node);
20608 parser_lex(parser);
20609 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_BINDING_POWER_MULTI_ASSIGNMENT + 1, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20610 return parse_write(parser, UP(multi_target), &token, value);
20612 case PM_SOURCE_ENCODING_NODE:
20613 case PM_FALSE_NODE:
20614 case PM_SOURCE_FILE_NODE:
20615 case PM_SOURCE_LINE_NODE:
20618 case PM_TRUE_NODE: {
20621 parser_lex(parser);
20622 pm_node_t *value = parse_assignment_values(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
20623 return parse_unwriteable_write(parser, node, &token, value);
20629 parser_lex(parser);
20630 pm_parser_err_token(parser, &token, PM_ERR_EXPRESSION_NOT_WRITABLE);
20634 case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL: {
20635 switch (PM_NODE_TYPE(node)) {
20636 case PM_BACK_REFERENCE_READ_NODE:
20637 case PM_NUMBERED_REFERENCE_READ_NODE:
20638 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20640 case PM_GLOBAL_VARIABLE_READ_NODE: {
20641 parser_lex(parser);
20643 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20644 pm_node_t *result = UP(pm_global_variable_and_write_node_create(parser, node, &token, value));
20648 case PM_CLASS_VARIABLE_READ_NODE: {
20649 parser_lex(parser);
20651 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20656 case PM_CONSTANT_PATH_NODE: {
20657 parser_lex(parser);
20659 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20662 return parse_shareable_constant_write(parser, write);
20664 case PM_CONSTANT_READ_NODE: {
20665 parser_lex(parser);
20667 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20670 return parse_shareable_constant_write(parser, write);
20672 case PM_INSTANCE_VARIABLE_READ_NODE: {
20673 parser_lex(parser);
20675 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20680 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20682 parser_lex(parser);
20684 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20685 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, name, 0));
20687 pm_node_unreference(parser, node);
20690 case PM_LOCAL_VARIABLE_READ_NODE: {
20691 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20693 pm_node_unreference(parser, node);
20697 parser_lex(parser);
20699 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20700 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, node, &token, value, cast->
name, cast->
depth));
20704 case PM_CALL_NODE: {
20710 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20713 parser_lex(parser);
20715 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20716 pm_node_t *result = UP(pm_local_variable_and_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20723 parser_lex(parser);
20728 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20729 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20730 return UP(pm_index_and_write_node_create(parser, cast, &token, value));
20734 if (pm_call_node_writable_p(parser, cast)) {
20735 parse_write_name(parser, &cast->
name);
20737 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20740 parse_call_operator_write(parser, cast, &token);
20741 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ, (uint16_t) (depth + 1));
20742 return UP(pm_call_and_write_node_create(parser, cast, &token, value));
20744 case PM_MULTI_WRITE_NODE: {
20745 parser_lex(parser);
20746 pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
20750 parser_lex(parser);
20755 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
20759 case PM_TOKEN_PIPE_PIPE_EQUAL: {
20760 switch (PM_NODE_TYPE(node)) {
20761 case PM_BACK_REFERENCE_READ_NODE:
20762 case PM_NUMBERED_REFERENCE_READ_NODE:
20763 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20765 case PM_GLOBAL_VARIABLE_READ_NODE: {
20766 parser_lex(parser);
20768 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20769 pm_node_t *result = UP(pm_global_variable_or_write_node_create(parser, node, &token, value));
20773 case PM_CLASS_VARIABLE_READ_NODE: {
20774 parser_lex(parser);
20776 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20781 case PM_CONSTANT_PATH_NODE: {
20782 parser_lex(parser);
20784 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20787 return parse_shareable_constant_write(parser, write);
20789 case PM_CONSTANT_READ_NODE: {
20790 parser_lex(parser);
20792 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20795 return parse_shareable_constant_write(parser, write);
20797 case PM_INSTANCE_VARIABLE_READ_NODE: {
20798 parser_lex(parser);
20800 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20805 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20807 parser_lex(parser);
20809 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20810 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, name, 0));
20812 pm_node_unreference(parser, node);
20815 case PM_LOCAL_VARIABLE_READ_NODE: {
20816 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20817 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->
start + PM_NODE_START(node));
20818 pm_node_unreference(parser, node);
20822 parser_lex(parser);
20824 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20825 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, node, &token, value, cast->
name, cast->
depth));
20829 case PM_CALL_NODE: {
20835 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20838 parser_lex(parser);
20840 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20841 pm_node_t *result = UP(pm_local_variable_or_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20848 parser_lex(parser);
20853 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20854 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20855 return UP(pm_index_or_write_node_create(parser, cast, &token, value));
20859 if (pm_call_node_writable_p(parser, cast)) {
20860 parse_write_name(parser, &cast->
name);
20862 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20865 parse_call_operator_write(parser, cast, &token);
20866 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ, (uint16_t) (depth + 1));
20867 return UP(pm_call_or_write_node_create(parser, cast, &token, value));
20869 case PM_MULTI_WRITE_NODE: {
20870 parser_lex(parser);
20871 pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
20875 parser_lex(parser);
20880 pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
20884 case PM_TOKEN_AMPERSAND_EQUAL:
20885 case PM_TOKEN_CARET_EQUAL:
20886 case PM_TOKEN_GREATER_GREATER_EQUAL:
20887 case PM_TOKEN_LESS_LESS_EQUAL:
20888 case PM_TOKEN_MINUS_EQUAL:
20889 case PM_TOKEN_PERCENT_EQUAL:
20890 case PM_TOKEN_PIPE_EQUAL:
20891 case PM_TOKEN_PLUS_EQUAL:
20892 case PM_TOKEN_SLASH_EQUAL:
20893 case PM_TOKEN_STAR_EQUAL:
20894 case PM_TOKEN_STAR_STAR_EQUAL: {
20895 switch (PM_NODE_TYPE(node)) {
20896 case PM_BACK_REFERENCE_READ_NODE:
20897 case PM_NUMBERED_REFERENCE_READ_NODE:
20898 PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20900 case PM_GLOBAL_VARIABLE_READ_NODE: {
20901 parser_lex(parser);
20903 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20904 pm_node_t *result = UP(pm_global_variable_operator_write_node_create(parser, node, &token, value));
20908 case PM_CLASS_VARIABLE_READ_NODE: {
20909 parser_lex(parser);
20911 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20916 case PM_CONSTANT_PATH_NODE: {
20917 parser_lex(parser);
20919 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20922 return parse_shareable_constant_write(parser, write);
20924 case PM_CONSTANT_READ_NODE: {
20925 parser_lex(parser);
20927 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20930 return parse_shareable_constant_write(parser, write);
20932 case PM_INSTANCE_VARIABLE_READ_NODE: {
20933 parser_lex(parser);
20935 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20940 case PM_IT_LOCAL_VARIABLE_READ_NODE: {
20942 parser_lex(parser);
20944 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20945 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, name, 0));
20947 pm_node_unreference(parser, node);
20950 case PM_LOCAL_VARIABLE_READ_NODE: {
20951 if (pm_token_is_numbered_parameter(parser, PM_NODE_START(node), PM_NODE_LENGTH(node))) {
20952 PM_PARSER_ERR_FORMAT(parser, PM_NODE_START(node), PM_NODE_LENGTH(node), PM_ERR_PARAMETER_NUMBERED_RESERVED, parser->
start + PM_NODE_START(node));
20953 pm_node_unreference(parser, node);
20957 parser_lex(parser);
20959 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20960 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, node, &token, value, cast->
name, cast->
depth));
20964 case PM_CALL_NODE: {
20965 parser_lex(parser);
20971 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_VARIABLE_CALL)) {
20974 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20975 pm_node_t *result = UP(pm_local_variable_operator_write_node_create(parser, UP(cast), &token, value, constant_id, 0));
20983 if (PM_NODE_FLAG_P(cast, PM_CALL_NODE_FLAGS_INDEX)) {
20984 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20985 return UP(pm_index_operator_write_node_create(parser, cast, &token, value));
20989 if (pm_call_node_writable_p(parser, cast)) {
20990 parse_write_name(parser, &cast->
name);
20992 pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
20995 parse_call_operator_write(parser, cast, &token);
20996 pm_node_t *value = parse_assignment_value(parser, previous_binding_power, binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
20997 return UP(pm_call_operator_write_node_create(parser, cast, &token, value));
20999 case PM_MULTI_WRITE_NODE: {
21000 parser_lex(parser);
21001 pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
21005 parser_lex(parser);
21014 case PM_TOKEN_AMPERSAND_AMPERSAND:
21015 case PM_TOKEN_KEYWORD_AND: {
21016 parser_lex(parser);
21018 pm_node_t *right = parse_expression(parser, binding_power, parser->
previous.
type == PM_TOKEN_KEYWORD_AND,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21019 return UP(pm_and_node_create(parser, node, &token, right));
21021 case PM_TOKEN_KEYWORD_OR:
21022 case PM_TOKEN_PIPE_PIPE: {
21023 parser_lex(parser);
21025 pm_node_t *right = parse_expression(parser, binding_power, parser->
previous.
type == PM_TOKEN_KEYWORD_OR,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21026 return UP(pm_or_node_create(parser, node, &token, right));
21028 case PM_TOKEN_EQUAL_TILDE: {
21036 parser_lex(parser);
21037 pm_node_t *argument = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21040 pm_call_node_t *call = pm_call_node_binary_create(parser, node, &token, argument, 0);
21046 if (PM_NODE_TYPE_P(node, PM_INTERPOLATED_REGULAR_EXPRESSION_NODE)) {
21053 bool interpolated =
false;
21054 size_t total_length = 0;
21058 if (PM_NODE_TYPE_P(part, PM_STRING_NODE)) {
21061 interpolated =
true;
21066 if (!interpolated && total_length > 0) {
21067 void *memory =
xmalloc(total_length);
21068 if (!memory) abort();
21070 uint8_t *cursor = memory;
21080 pm_string_owned_init(&owned, (uint8_t *) memory, total_length);
21082 result = parse_regular_expression_named_captures(parser, &owned, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21085 }
else if (PM_NODE_TYPE_P(node, PM_REGULAR_EXPRESSION_NODE)) {
21089 result = parse_regular_expression_named_captures(parser, content, call, PM_NODE_FLAG_P(node, PM_REGULAR_EXPRESSION_FLAGS_EXTENDED));
21094 case PM_TOKEN_UAMPERSAND:
21095 case PM_TOKEN_USTAR:
21096 case PM_TOKEN_USTAR_STAR:
21099 case PM_TOKEN_BANG_EQUAL:
21100 case PM_TOKEN_BANG_TILDE:
21101 case PM_TOKEN_EQUAL_EQUAL:
21102 case PM_TOKEN_EQUAL_EQUAL_EQUAL:
21103 case PM_TOKEN_LESS_EQUAL_GREATER:
21104 case PM_TOKEN_CARET:
21105 case PM_TOKEN_PIPE:
21106 case PM_TOKEN_AMPERSAND:
21107 case PM_TOKEN_GREATER_GREATER:
21108 case PM_TOKEN_LESS_LESS:
21109 case PM_TOKEN_MINUS:
21110 case PM_TOKEN_PLUS:
21111 case PM_TOKEN_PERCENT:
21112 case PM_TOKEN_SLASH:
21113 case PM_TOKEN_STAR:
21114 case PM_TOKEN_STAR_STAR: {
21115 parser_lex(parser);
21117 switch (PM_NODE_TYPE(node)) {
21118 case PM_RESCUE_MODIFIER_NODE: {
21121 PM_PARSER_ERR_TOKEN_FORMAT(parser, &
operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(
operator.
type));
21125 case PM_AND_NODE: {
21127 if (PM_NODE_TYPE_P(cast->
right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->
right, PM_MATCH_REQUIRED_NODE)) {
21128 PM_PARSER_ERR_TOKEN_FORMAT(parser, &
operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(
operator.
type));
21134 if (PM_NODE_TYPE_P(cast->
right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->
right, PM_MATCH_REQUIRED_NODE)) {
21135 PM_PARSER_ERR_TOKEN_FORMAT(parser, &
operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(
operator.
type));
21143 pm_node_t *argument = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21144 return UP(pm_call_node_binary_create(parser, node, &token, argument, 0));
21146 case PM_TOKEN_GREATER:
21147 case PM_TOKEN_GREATER_EQUAL:
21148 case PM_TOKEN_LESS:
21149 case PM_TOKEN_LESS_EQUAL: {
21150 if (PM_NODE_TYPE_P(node, PM_CALL_NODE) && PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_COMPARISON)) {
21151 PM_PARSER_WARN_TOKEN_FORMAT_CONTENT(parser, &parser->
current, PM_WARN_COMPARISON_AFTER_COMPARISON);
21154 parser_lex(parser);
21155 pm_node_t *argument = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21156 return UP(pm_call_node_binary_create(parser, node, &token, argument, PM_CALL_NODE_FLAGS_COMPARISON));
21158 case PM_TOKEN_AMPERSAND_DOT:
21159 case PM_TOKEN_DOT: {
21160 parser_lex(parser);
21165 if (match1(parser, PM_TOKEN_PARENTHESIS_LEFT)) {
21166 parse_arguments_list(parser, &arguments,
true,
false, (uint16_t) (depth + 1));
21167 return UP(pm_call_node_shorthand_create(parser, node, &
operator, &arguments));
21170 switch (PM_NODE_TYPE(node)) {
21171 case PM_RESCUE_MODIFIER_NODE: {
21174 PM_PARSER_ERR_TOKEN_FORMAT(parser, &
operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(
operator.
type));
21178 case PM_AND_NODE: {
21180 if (PM_NODE_TYPE_P(cast->
right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->
right, PM_MATCH_REQUIRED_NODE)) {
21181 PM_PARSER_ERR_TOKEN_FORMAT(parser, &
operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(
operator.
type));
21187 if (PM_NODE_TYPE_P(cast->
right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->
right, PM_MATCH_REQUIRED_NODE)) {
21188 PM_PARSER_ERR_TOKEN_FORMAT(parser, &
operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT,
pm_token_type_human(
operator.
type));
21198 switch (parser->
current.type) {
21199 case PM_CASE_OPERATOR:
21200 case PM_CASE_KEYWORD:
21201 case PM_TOKEN_CONSTANT:
21202 case PM_TOKEN_IDENTIFIER:
21203 case PM_TOKEN_METHOD_NAME: {
21204 parser_lex(parser);
21214 parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1));
21215 pm_call_node_t *call = pm_call_node_call_create(parser, node, &
operator, &message, &arguments);
21218 (previous_binding_power == PM_BINDING_POWER_STATEMENT) &&
21221 match1(parser, PM_TOKEN_COMMA)
21223 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21228 case PM_TOKEN_DOT_DOT:
21229 case PM_TOKEN_DOT_DOT_DOT: {
21230 parser_lex(parser);
21233 if (token_begins_expression_p(parser->
current.type)) {
21234 right = parse_expression(parser, binding_power,
false,
false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21237 return UP(pm_range_node_create(parser, node, &token, right));
21239 case PM_TOKEN_KEYWORD_IF_MODIFIER: {
21241 parser_lex(parser);
21243 pm_node_t *predicate = parse_value_expression(parser, binding_power,
true,
false, PM_ERR_CONDITIONAL_IF_PREDICATE, (uint16_t) (depth + 1));
21244 return UP(pm_if_node_modifier_create(parser, node, &keyword, predicate));
21246 case PM_TOKEN_KEYWORD_UNLESS_MODIFIER: {
21248 parser_lex(parser);
21250 pm_node_t *predicate = parse_value_expression(parser, binding_power,
true,
false, PM_ERR_CONDITIONAL_UNLESS_PREDICATE, (uint16_t) (depth + 1));
21251 return UP(pm_unless_node_modifier_create(parser, node, &keyword, predicate));
21253 case PM_TOKEN_KEYWORD_UNTIL_MODIFIER: {
21254 parser_lex(parser);
21256 pm_statements_node_body_append(parser, statements, node,
true);
21258 pm_node_t *predicate = parse_value_expression(parser, binding_power,
true,
false, PM_ERR_CONDITIONAL_UNTIL_PREDICATE, (uint16_t) (depth + 1));
21259 return UP(pm_until_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21261 case PM_TOKEN_KEYWORD_WHILE_MODIFIER: {
21262 parser_lex(parser);
21264 pm_statements_node_body_append(parser, statements, node,
true);
21266 pm_node_t *predicate = parse_value_expression(parser, binding_power,
true,
false, PM_ERR_CONDITIONAL_WHILE_PREDICATE, (uint16_t) (depth + 1));
21267 return UP(pm_while_node_modifier_create(parser, &token, predicate, statements, PM_NODE_TYPE_P(node, PM_BEGIN_NODE) ? PM_LOOP_FLAGS_BEGIN_MODIFIER : 0));
21269 case PM_TOKEN_QUESTION_MARK: {
21272 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
21275 parser_lex(parser);
21277 pm_node_t *true_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_TERNARY_EXPRESSION_TRUE, (uint16_t) (depth + 1));
21287 pm_node_t *false_expression = UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &colon), PM_TOKEN_LENGTH(&colon)));
21289 context_pop(parser);
21290 pop_block_exits(parser, previous_block_exits);
21291 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21294 accept1(parser, PM_TOKEN_NEWLINE);
21295 expect1(parser, PM_TOKEN_COLON, PM_ERR_TERNARY_COLON);
21298 pm_node_t *false_expression = parse_expression(parser, PM_BINDING_POWER_DEFINED,
false,
false, PM_ERR_TERNARY_EXPRESSION_FALSE, (uint16_t) (depth + 1));
21300 context_pop(parser);
21301 pop_block_exits(parser, previous_block_exits);
21302 return UP(pm_if_node_ternary_create(parser, node, &qmark, true_expression, &colon, false_expression));
21304 case PM_TOKEN_COLON_COLON: {
21305 parser_lex(parser);
21308 switch (parser->
current.type) {
21309 case PM_TOKEN_CONSTANT: {
21310 parser_lex(parser);
21314 (parser->
current.type == PM_TOKEN_PARENTHESIS_LEFT) ||
21315 (accepts_command_call && (token_begins_expression_p(parser->
current.type) || match3(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_USTAR_STAR)))
21326 parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1));
21327 path = UP(pm_call_node_call_create(parser, node, &delimiter, &message, &arguments));
21330 path = UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->
previous));
21334 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21335 return parse_targets_validate(parser, path, PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21340 case PM_CASE_OPERATOR:
21341 case PM_CASE_KEYWORD:
21342 case PM_TOKEN_IDENTIFIER:
21343 case PM_TOKEN_METHOD_NAME: {
21344 parser_lex(parser);
21350 parse_arguments_list(parser, &arguments,
true, accepts_command_call, (uint16_t) (depth + 1));
21351 pm_call_node_t *call = pm_call_node_call_create(parser, node, &delimiter, &message, &arguments);
21354 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21355 return parse_targets_validate(parser, UP(call), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21360 case PM_TOKEN_PARENTHESIS_LEFT: {
21364 parse_arguments_list(parser, &arguments,
true,
false, (uint16_t) (depth + 1));
21366 return UP(pm_call_node_shorthand_create(parser, node, &delimiter, &arguments));
21369 expect1(parser, PM_TOKEN_CONSTANT, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
21370 return UP(pm_constant_path_node_create(parser, node, &delimiter, &parser->
previous));
21374 case PM_TOKEN_KEYWORD_RESCUE_MODIFIER: {
21376 parser_lex(parser);
21377 accept1(parser, PM_TOKEN_NEWLINE);
21379 pm_node_t *value = parse_expression(parser, binding_power,
true,
false, PM_ERR_RESCUE_MODIFIER_VALUE, (uint16_t) (depth + 1));
21380 context_pop(parser);
21382 return UP(pm_rescue_modifier_node_create(parser, node, &token, value));
21384 case PM_TOKEN_BRACKET_LEFT: {
21385 parser_lex(parser);
21390 if (!accept1(parser, PM_TOKEN_BRACKET_RIGHT)) {
21391 pm_accepts_block_stack_push(parser,
true);
21392 parse_arguments(parser, &arguments,
false, PM_TOKEN_BRACKET_RIGHT, (uint16_t) (depth + 1));
21393 pm_accepts_block_stack_pop(parser);
21394 expect1(parser, PM_TOKEN_BRACKET_RIGHT, PM_ERR_EXPECT_RBRACKET);
21401 if (previous_binding_power == PM_BINDING_POWER_STATEMENT && match1(parser, PM_TOKEN_COMMA)) {
21402 pm_call_node_t *aref = pm_call_node_aref_create(parser, node, &arguments);
21403 return parse_targets_validate(parser, UP(aref), PM_BINDING_POWER_INDEX, (uint16_t) (depth + 1));
21410 if (accept1(parser, PM_TOKEN_BRACE_LEFT)) {
21411 block = parse_block(parser, (uint16_t) (depth + 1));
21412 pm_arguments_validate_block(parser, &arguments, block);
21413 }
else if (pm_accepts_block_stack_p(parser) && accept1(parser, PM_TOKEN_KEYWORD_DO)) {
21414 block = parse_block(parser, (uint16_t) (depth + 1));
21417 if (block != NULL) {
21418 if (arguments.
block != NULL) {
21419 pm_parser_err_node(parser, UP(block), PM_ERR_ARGUMENT_AFTER_BLOCK);
21421 arguments.
arguments = pm_arguments_node_create(parser);
21426 arguments.
block = UP(block);
21429 return UP(pm_call_node_aref_create(parser, node, &arguments));
21431 case PM_TOKEN_KEYWORD_IN: {
21437 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21438 parser_lex(parser);
21441 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_IN, (uint16_t) (depth + 1));
21445 return UP(pm_match_predicate_node_create(parser, node, pattern, &
operator));
21447 case PM_TOKEN_EQUAL_GREATER: {
21453 lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
21454 parser_lex(parser);
21457 pm_node_t *pattern = parse_pattern(parser, &captures, PM_PARSE_PATTERN_TOP | PM_PARSE_PATTERN_MULTI, PM_ERR_PATTERN_EXPRESSION_AFTER_HROCKET, (uint16_t) (depth + 1));
21461 return UP(pm_match_required_node_create(parser, node, pattern, &
operator));
21464 assert(
false &&
"unreachable");
21469#undef PM_PARSE_PATTERN_SINGLE
21470#undef PM_PARSE_PATTERN_TOP
21471#undef PM_PARSE_PATTERN_MULTI
21481 (node->
block == NULL || PM_NODE_TYPE_P(node->
block, PM_BLOCK_ARGUMENT_NODE)) &&
21495parse_expression(
pm_parser_t *parser, pm_binding_power_t binding_power,
bool accepts_command_call,
bool accepts_label,
pm_diagnostic_id_t diag_id, uint16_t depth) {
21497 pm_parser_err_current(parser, PM_ERR_NESTING_TOO_DEEP);
21498 return UP(pm_missing_node_create(parser, PM_TOKEN_START(parser, &parser->
current), PM_TOKEN_LENGTH(&parser->
current)));
21501 pm_node_t *node = parse_expression_prefix(parser, binding_power, accepts_command_call, accepts_label, diag_id, depth);
21503 switch (PM_NODE_TYPE(node)) {
21504 case PM_MISSING_NODE:
21508 case PM_PRE_EXECUTION_NODE:
21509 case PM_POST_EXECUTION_NODE:
21510 case PM_ALIAS_GLOBAL_VARIABLE_NODE:
21511 case PM_ALIAS_METHOD_NODE:
21512 case PM_MULTI_WRITE_NODE:
21513 case PM_UNDEF_NODE:
21516 if (pm_binding_powers[parser->
current.type].left > PM_BINDING_POWER_MODIFIER) {
21526 if ((pm_binding_powers[parser->
current.type].left > PM_BINDING_POWER_COMPOSITION) && pm_call_node_command_p((
pm_call_node_t *) node)) {
21530 case PM_SYMBOL_NODE:
21534 if (pm_symbol_node_label_p(parser, node)) {
21545 pm_token_type_t current_token_type;
21548 current_token_type = parser->
current.type,
21549 current_binding_powers = pm_binding_powers[current_token_type],
21550 binding_power <= current_binding_powers.
left &&
21551 current_binding_powers.
binary
21553 node = parse_expression_infix(parser, node, binding_power, current_binding_powers.
right, accepts_command_call, (uint16_t) (depth + 1));
21555 switch (PM_NODE_TYPE(node)) {
21556 case PM_MULTI_WRITE_NODE:
21559 if (pm_binding_powers[parser->
current.type].left > PM_BINDING_POWER_MODIFIER) {
21563 case PM_CLASS_VARIABLE_WRITE_NODE:
21564 case PM_CONSTANT_PATH_WRITE_NODE:
21565 case PM_CONSTANT_WRITE_NODE:
21566 case PM_GLOBAL_VARIABLE_WRITE_NODE:
21567 case PM_INSTANCE_VARIABLE_WRITE_NODE:
21568 case PM_LOCAL_VARIABLE_WRITE_NODE:
21571 if (PM_NODE_FLAG_P(node, PM_WRITE_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->
current.type].left > PM_BINDING_POWER_MODIFIER) {
21579 if (PM_NODE_FLAG_P(node, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY) && pm_binding_powers[parser->
current.type].left > PM_BINDING_POWER_MODIFIER) {
21583 case PM_RESCUE_MODIFIER_NODE:
21587 if (pm_binding_powers[parser->
current.type].left > PM_BINDING_POWER_MODIFIER) {
21591 if (PM_NODE_TYPE_P(rescue_expression, PM_MATCH_REQUIRED_NODE) || PM_NODE_TYPE_P(rescue_expression, PM_MATCH_PREDICATE_NODE)) {
21602 if (current_binding_powers.
nonassoc) {
21605 if (match1(parser, current_token_type)) {
21617 if (PM_NODE_TYPE_P(node, PM_RANGE_NODE) && ((
pm_range_node_t *) node)->right == NULL) {
21618 if (match4(parser, PM_TOKEN_UAMPERSAND, PM_TOKEN_USTAR, PM_TOKEN_DOT, PM_TOKEN_AMPERSAND_DOT)) {
21623 if (PM_BINDING_POWER_TERM <= pm_binding_powers[parser->
current.type].left) {
21626 }
else if (current_binding_powers.
left <= pm_binding_powers[parser->
current.type].left) {
21631 if (accepts_command_call) {
21640 switch (node->
type) {
21641 case PM_CALL_NODE: {
21655 cast->
block == NULL &&
21665 cast->
block != NULL && PM_NODE_TYPE_P(cast->
block, PM_BLOCK_NODE)
21668 accepts_command_call =
false;
21673 case PM_CONSTANT_PATH_NODE:
21676 accepts_command_call =
false;
21684 !next_binding_powers.
binary ||
21685 binding_power > next_binding_powers.
left ||
21686 (PM_NODE_TYPE_P(node, PM_CALL_NODE) && pm_call_node_command_p((
pm_call_node_t *) node))
21702 if (PM_PARSER_COMMAND_LINE_OPTION_P(parser)) {
21703 if (statements == NULL) {
21704 statements = pm_statements_node_create(parser);
21708 pm_arguments_node_arguments_append(
21711 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser,
"$_", 2)))
21714 pm_statements_node_body_append(parser, statements, UP(pm_call_node_fcall_synthesized_create(
21717 pm_parser_constant_id_constant(parser,
"print", 5)
21721 if (PM_PARSER_COMMAND_LINE_OPTION_N(parser)) {
21722 if (PM_PARSER_COMMAND_LINE_OPTION_A(parser)) {
21723 if (statements == NULL) {
21724 statements = pm_statements_node_create(parser);
21728 pm_arguments_node_arguments_append(
21731 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser,
"$;", 2)))
21734 pm_global_variable_read_node_t *receiver = pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser,
"$_", 2));
21735 pm_call_node_t *call = pm_call_node_call_synthesized_create(parser, UP(receiver),
"split", arguments);
21739 pm_parser_constant_id_constant(parser,
"$F", 2),
21743 pm_statements_node_body_prepend(parser->
arena, statements, UP(write));
21747 pm_arguments_node_arguments_append(
21750 UP(pm_global_variable_read_node_synthesized_create(parser, pm_parser_constant_id_constant(parser,
"$/", 2)))
21753 if (PM_PARSER_COMMAND_LINE_OPTION_L(parser)) {
21755 pm_keyword_hash_node_elements_append(parser->
arena, keywords, UP(pm_assoc_node_create(
21757 UP(pm_symbol_node_synthesized_create(parser,
"chomp")),
21759 UP(pm_true_node_synthesized_create(parser))
21762 pm_arguments_node_arguments_append(parser->
arena, arguments, UP(keywords));
21763 pm_node_flag_set(UP(arguments), PM_ARGUMENTS_NODE_FLAGS_CONTAINS_KEYWORDS);
21767 pm_statements_node_body_append(parser, wrapped_statements, UP(pm_while_node_synthesized_create(
21769 UP(pm_call_node_fcall_synthesized_create(parser, arguments, pm_parser_constant_id_constant(parser,
"gets", 4))),
21773 statements = wrapped_statements;
21788 pm_parser_scope_push(parser,
true);
21792 pm_node_list_t *previous_block_exits = push_block_exits(parser, ¤t_block_exits);
21794 parser_lex(parser);
21801 assert(statements->
body.
size > 0);
21802 pm_void_statement_check(parser, statements->
body.
nodes[statements->
body.
size - 1]);
21807 pm_parser_scope_pop(parser);
21812 statements = wrap_statements(parser, statements);
21814 flush_block_exits(parser, previous_block_exits);
21820 if (statements == NULL) {
21821 statements = pm_statements_node_create(parser);
21825 return UP(pm_program_node_create(parser, &locals, statements));
21842pm_strnstr(
const char *big,
const char *little,
size_t big_length) {
21843 size_t little_length = strlen(little);
21845 for (
const char *max = big + big_length - little_length; big <= max; big++) {
21846 if (*big == *little && memcmp(big, little, little_length) == 0)
return big;
21853#define pm_parser_warn_shebang_carriage_return(parser, start, length) ((void) 0)
21861pm_parser_warn_shebang_carriage_return(
pm_parser_t *parser,
const uint8_t *start,
size_t length) {
21862 if (length > 2 && start[length - 2] ==
'\r' && start[length - 1] ==
'\n') {
21863 pm_parser_warn(parser, U32(start - parser->
start), U32(length), PM_WARN_SHEBANG_CARRIAGE_RETURN);
21874 const char *switches = pm_strnstr(engine,
" -", length);
21875 if (switches == NULL)
return;
21880 (
const uint8_t *) (switches + 1),
21881 length - ((
size_t) (switches - engine)) - 1,
21885 size_t encoding_length;
21888 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
21900 assert(arena != NULL);
21901 assert(source != NULL);
21906 .lex_state = PM_LEX_STATE_BEG,
21907 .enclosure_nesting = 0,
21908 .lambda_enclosure_nesting = -1,
21909 .brace_nesting = 0,
21910 .do_loop_stack = 0,
21911 .accepts_block_stack = 0,
21914 .stack = {{ .mode = PM_LEX_DEFAULT }},
21918 .end = source + size,
21919 .previous = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21920 .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
21921 .next_start = NULL,
21922 .heredoc_end = NULL,
21924 .comment_list = { 0 },
21925 .magic_comment_list = { 0 },
21926 .warning_list = { 0 },
21927 .error_list = { 0 },
21928 .current_scope = NULL,
21929 .current_context = NULL,
21931 .encoding_changed_callback = NULL,
21932 .encoding_comment_start = source,
21933 .lex_callback = NULL,
21935 .constant_pool = { 0 },
21936 .line_offsets = { 0 },
21940 .explicit_encoding = NULL,
21942 .parsing_eval =
false,
21943 .partial_script =
false,
21944 .command_start =
true,
21945 .recovering =
false,
21946 .encoding_locked =
false,
21947 .encoding_changed =
false,
21948 .pattern_matching_newlines =
false,
21949 .in_keyword_arg =
false,
21950 .current_block_exits = NULL,
21951 .semantic_token_seen =
false,
21953 .current_regular_expression_ascii_only =
false,
21954 .warn_mismatched_indentation =
true
21971 uint32_t constant_size = ((uint32_t) size) / 95;
21972 pm_constant_pool_init(&parser->
constant_pool, constant_size < 4 ? 4 : constant_size);
21977 size_t newline_size = size / 22;
21978 pm_line_offset_list_init(&parser->
line_offsets, newline_size < 4 ? 4 : newline_size);
21981 if (options != NULL) {
21990 if (encoding_length > 0) {
21992 parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
22014 for (
size_t scope_index = 0; scope_index < options->
scopes_count; scope_index++) {
22016 pm_parser_scope_push(parser, scope_index == 0);
22022 for (
size_t local_index = 0; local_index < scope->
locals_count; local_index++) {
22028 void *allocated =
xmalloc(length);
22029 if (allocated == NULL)
continue;
22031 memcpy(allocated, source, length);
22032 pm_parser_local_add_owned(parser, (uint8_t *) allocated, length);
22043 pm_accepts_block_stack_push(parser,
true);
22046 if (size >= 3 && source[0] == 0xef && source[1] == 0xbb && source[2] == 0xbf) {
22059 bool search_shebang = PM_PARSER_COMMAND_LINE_OPTION_X(parser);
22076 const uint8_t *newline = next_newline(parser->
current.end, parser->
end - parser->
current.end);
22077 size_t length = (size_t) ((newline != NULL ? newline : parser->
end) - parser->
current.end);
22079 if (length > 2 && parser->
current.end[0] ==
'#' && parser->
current.end[1] ==
'!') {
22080 const char *engine;
22082 if ((engine = pm_strnstr((
const char *) parser->
start,
"ruby", length)) != NULL) {
22083 if (newline != NULL) {
22087 pm_parser_warn_shebang_carriage_return(parser, parser->
start, length + 1);
22092 pm_parser_init_shebang(parser, options, engine, length - ((
size_t) (engine - (
const char *) parser->
start)));
22095 search_shebang =
false;
22097 search_shebang =
true;
22103 if (search_shebang) {
22106 bool found_shebang =
false;
22110 const uint8_t *cursor = parser->
start;
22114 const uint8_t *newline = next_newline(cursor, parser->
end - cursor);
22116 while (newline != NULL) {
22117 pm_line_offset_list_append(&parser->
line_offsets, U32(newline - parser->
start + 1));
22119 cursor = newline + 1;
22120 newline = next_newline(cursor, parser->
end - cursor);
22122 size_t length = (size_t) ((newline != NULL ? newline : parser->
end) - cursor);
22123 if (length > 2 && cursor[0] ==
'#' && cursor[1] ==
'!') {
22124 const char *engine;
22125 if ((engine = pm_strnstr((
const char *) cursor,
"ruby", length)) != NULL) {
22126 found_shebang =
true;
22128 if (newline != NULL) {
22129 pm_parser_warn_shebang_carriage_return(parser, cursor, length + 1);
22134 pm_parser_init_shebang(parser, options, engine, length - ((
size_t) (engine - (
const char *) cursor)));
22142 if (found_shebang) {
22144 parser->
current = (
pm_token_t) { .type = PM_TOKEN_EOF, .start = cursor, .end = cursor };
22146 pm_parser_err(parser, 0, 0, PM_ERR_SCRIPT_NOT_FOUND);
22173 for (node = list->
head; node != NULL; node = next) {
22185pm_magic_comment_list_free(
pm_list_t *list) {
22188 for (node = list->
head; node != NULL; node = next) {
22202 pm_diagnostic_list_free(&parser->
error_list);
22214 pm_parser_scope_pop(parser);
22218 lex_mode_pop(parser);
22227 return parse_program(parser);
22237#define LINE_SIZE 4096
22238 char line[LINE_SIZE];
22240 while (memset(line,
'\n', LINE_SIZE), stream_fgets(line, LINE_SIZE, stream) != NULL) {
22241 size_t length = LINE_SIZE;
22242 while (length > 0 && line[length - 1] ==
'\n') length--;
22244 if (length == LINE_SIZE) {
22249 pm_buffer_append_string(buffer, line, length);
22255 pm_buffer_append_string(buffer, line, length);
22263 if (strncmp(line,
"__END__", 7) == 0)
return false;
22266 if (strncmp(line,
"__END__\n", 8) == 0)
return false;
22269 if (strncmp(line,
"__END__\r\n", 9) == 0)
return false;
22275 if (stream_feof(stream)) {
22294 bool eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22300 eof = pm_parse_stream_read(buffer, stream, stream_fgets, stream_feof);
22303 pm_arena_free(arena);
22315pm_parse_success_p(
const uint8_t *source,
size_t size,
const char *data) {
22317 pm_options_read(&options, data);
22327 pm_arena_free(&arena);
22333#undef PM_CASE_KEYWORD
22334#undef PM_CASE_OPERATOR
22335#undef PM_CASE_WRITABLE
22336#undef PM_STRING_EMPTY
22341#ifndef PRISM_EXCLUDE_SERIALIZATION
22345 pm_buffer_append_string(buffer,
"PRISM", 5);
22349 pm_buffer_append_byte(buffer, PRISM_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);
22357 pm_serialize_header(buffer);
22359 pm_buffer_append_byte(buffer,
'\0');
22367pm_serialize_parse(
pm_buffer_t *buffer,
const uint8_t *source,
size_t size,
const char *data) {
22369 pm_options_read(&options, data);
22377 pm_serialize_header(buffer);
22379 pm_buffer_append_byte(buffer,
'\0');
22382 pm_arena_free(&arena);
22395 pm_options_read(&options, data);
22399 pm_serialize_header(buffer);
22401 pm_buffer_append_byte(buffer,
'\0');
22405 pm_arena_free(&arena);
22413pm_serialize_parse_comments(
pm_buffer_t *buffer,
const uint8_t *source,
size_t size,
const char *data) {
22415 pm_options_read(&options, data);
22422 pm_serialize_header(buffer);
22424 pm_buffer_append_varsint(buffer, parser.
start_line);
22428 pm_arena_free(&arena);
22441 PM_SLICE_TYPE_ERROR = -1,
22444 PM_SLICE_TYPE_NONE,
22447 PM_SLICE_TYPE_LOCAL,
22450 PM_SLICE_TYPE_CONSTANT,
22453 PM_SLICE_TYPE_METHOD_NAME
22460pm_slice_type(
const uint8_t *source,
size_t length,
const char *encoding_name) {
22462 const pm_encoding_t *encoding = pm_encoding_find((
const uint8_t *) encoding_name, (
const uint8_t *) (encoding_name + strlen(encoding_name)));
22463 if (encoding == NULL)
return PM_SLICE_TYPE_ERROR;
22466 if (length == 0)
return PM_SLICE_TYPE_NONE;
22469 if ((width = encoding->
alpha_char(source, (ptrdiff_t) length)) != 0) {
22471 }
else if (*source ==
'_') {
22474 }
else if ((*source >= 0x80) && ((width = encoding->
char_width(source, (ptrdiff_t) length)) > 0)) {
22478 return PM_SLICE_TYPE_NONE;
22482 const uint8_t *end = source + length;
22483 pm_slice_type_t result = encoding->
isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22489 while (source < end) {
22490 if ((width = encoding->
alnum_char(source, end - source)) != 0) {
22493 }
else if (*source ==
'_') {
22496 }
else if ((*source >= 0x80) && ((width = encoding->
char_width(source, end - source)) > 0)) {
22506 if (*source ==
'!' || *source ==
'?' || *source ==
'=') {
22508 result = PM_SLICE_TYPE_METHOD_NAME;
22512 return source == end ? result : PM_SLICE_TYPE_NONE;
22519pm_string_query_local(
const uint8_t *source,
size_t length,
const char *encoding_name) {
22520 switch (pm_slice_type(source, length, encoding_name)) {
22521 case PM_SLICE_TYPE_ERROR:
22523 case PM_SLICE_TYPE_NONE:
22524 case PM_SLICE_TYPE_CONSTANT:
22525 case PM_SLICE_TYPE_METHOD_NAME:
22527 case PM_SLICE_TYPE_LOCAL:
22531 assert(
false &&
"unreachable");
22539pm_string_query_constant(
const uint8_t *source,
size_t length,
const char *encoding_name) {
22540 switch (pm_slice_type(source, length, encoding_name)) {
22541 case PM_SLICE_TYPE_ERROR:
22543 case PM_SLICE_TYPE_NONE:
22544 case PM_SLICE_TYPE_LOCAL:
22545 case PM_SLICE_TYPE_METHOD_NAME:
22547 case PM_SLICE_TYPE_CONSTANT:
22551 assert(
false &&
"unreachable");
22559pm_string_query_method_name(
const uint8_t *source,
size_t length,
const char *encoding_name) {
22560#define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22561#define C1(c) (*source == c)
22562#define C2(s) (memcmp(source, s, 2) == 0)
22563#define C3(s) (memcmp(source, s, 3) == 0)
22565 switch (pm_slice_type(source, length, encoding_name)) {
22566 case PM_SLICE_TYPE_ERROR:
22568 case PM_SLICE_TYPE_NONE:
22570 case PM_SLICE_TYPE_LOCAL:
22572 return B((length != 2) || (source[0] !=
'_') || (source[1] ==
'0') || !pm_char_is_decimal_digit(source[1]));
22573 case PM_SLICE_TYPE_CONSTANT:
22575 case PM_SLICE_TYPE_METHOD_NAME:
22582 return B(C1(
'&') || C1(
'`') || C1(
'!') || C1(
'^') || C1(
'>') || C1(
'<') || C1(
'-') || C1(
'%') || C1(
'|') || C1(
'+') || C1(
'/') || C1(
'*') || C1(
'~'));
22584 return B(C2(
"!=") || C2(
"!~") || C2(
"[]") || C2(
"==") || C2(
"=~") || C2(
">=") || C2(
">>") || C2(
"<=") || C2(
"<<") || C2(
"**"));
22586 return B(C3(
"===") || C3(
"<=>") || C3(
"[]="));
pm_diagnostic_id_t
The diagnostic IDs of all of the diagnostics, used to communicate the types of errors between the par...
#define xmalloc
Old name of ruby_xmalloc.
#define xcalloc
Old name of ruby_xcalloc.
int len
Length of the buffer.
VALUE type(ANYARGS)
ANYARGS-ed function type.
PRISM_EXPORTED_FUNCTION void pm_options_free(pm_options_t *options)
Free the internal memory associated with the options.
PRISM_EXPORTED_FUNCTION const pm_string_t * pm_options_scope_local_get(const pm_options_scope_t *scope, size_t index)
Return a pointer to the local at the given index within the given scope.
PRISM_EXPORTED_FUNCTION const pm_options_scope_t * pm_options_scope_get(const pm_options_t *options, size_t index)
Return a pointer to the scope at the given index within the given options.
static const uint8_t PM_OPTIONS_COMMAND_LINE_N
A bit representing whether or not the command line -n option was set.
#define PM_OPTIONS_FROZEN_STRING_LITERAL_DISABLED
String literals should be made frozen.
#define PM_OPTIONS_FROZEN_STRING_LITERAL_ENABLED
String literals should be made mutable.
#define PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
String literals may be frozen or mutable depending on the implementation default.
static const uint8_t PM_OPTIONS_COMMAND_LINE_P
A bit representing whether or not the command line -p option was set.
@ PM_OPTIONS_VERSION_CRUBY_3_3
The vendored version of prism in CRuby 3.3.x.
@ PM_OPTIONS_VERSION_LATEST
The current version of prism.
@ PM_OPTIONS_VERSION_CRUBY_4_1
The vendored version of prism in CRuby 4.1.x.
@ PM_OPTIONS_VERSION_UNSET
If an explicit version is not provided, the current version of prism will be used.
@ PM_OPTIONS_VERSION_CRUBY_3_4
The vendored version of prism in CRuby 3.4.x.
@ PM_OPTIONS_VERSION_CRUBY_4_0
The vendored version of prism in CRuby 4.0.x.
pm_heredoc_indent_t
The type of indentation that a heredoc uses.
struct pm_context_node pm_context_node_t
This is a node in a linked list of contexts.
#define PM_LEX_STACK_SIZE
We pre-allocate a certain number of lex states in order to avoid having to call malloc too many times...
struct pm_parser pm_parser_t
The parser used to parse Ruby source.
struct pm_comment pm_comment_t
This is a node in the linked list of comments that we've found while parsing.
pm_lex_state_t
This enum combines the various bits from the above enum into individual values that represent the var...
struct pm_scope pm_scope_t
This struct represents a node in a linked list of scopes.
pm_heredoc_quote_t
The type of quote that a heredoc uses.
void(* pm_encoding_changed_callback_t)(pm_parser_t *parser)
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
pm_context_t
While parsing, we keep track of a stack of contexts.
@ PM_CONTEXT_CLASS_RESCUE
a rescue statement within a class statement
@ PM_CONTEXT_ELSIF
an elsif clause
@ PM_CONTEXT_DEF_RESCUE
a rescue statement within a method definition
@ PM_CONTEXT_ELSE
an else clause
@ PM_CONTEXT_FOR_INDEX
a for loop's index
@ PM_CONTEXT_CASE_WHEN
a case when statements
@ PM_CONTEXT_BLOCK_RESCUE
a rescue statement within a do..end block
@ PM_CONTEXT_MODULE
a module declaration
@ PM_CONTEXT_DEF_PARAMS
a method definition's parameters
@ PM_CONTEXT_CASE_IN
a case in statements
@ PM_CONTEXT_BLOCK_ELSE
a rescue else statement within a do..end block
@ PM_CONTEXT_LOOP_PREDICATE
the predicate clause of a loop statement
@ PM_CONTEXT_SCLASS
a singleton class definition
@ PM_CONTEXT_UNLESS
an unless statement
@ PM_CONTEXT_POSTEXE
an END block
@ PM_CONTEXT_IF
an if statement
@ PM_CONTEXT_MULTI_TARGET
a multiple target expression
@ PM_CONTEXT_LAMBDA_RESCUE
a rescue statement within a lambda expression
@ PM_CONTEXT_BEGIN_ELSE
a rescue else statement with an explicit begin
@ PM_CONTEXT_NONE
a null context, used for returning a value from a function
@ PM_CONTEXT_CLASS_ELSE
a rescue else statement within a class statement
@ PM_CONTEXT_LAMBDA_ENSURE
an ensure statement within a lambda expression
@ PM_CONTEXT_BLOCK_ENSURE
an ensure statement within a do..end block
@ PM_CONTEXT_CLASS_ENSURE
an ensure statement within a class statement
@ PM_CONTEXT_LAMBDA_BRACES
a lambda expression with braces
@ PM_CONTEXT_MODULE_ELSE
a rescue else statement within a module statement
@ PM_CONTEXT_PARENS
a parenthesized expression
@ PM_CONTEXT_BLOCK_BRACES
expressions in block arguments using braces
@ PM_CONTEXT_BLOCK_PARAMETERS
expressions in block parameters foo do |...| end
@ PM_CONTEXT_DEF_ENSURE
an ensure statement within a method definition
@ PM_CONTEXT_SCLASS_RESCUE
a rescue statement with a singleton class
@ PM_CONTEXT_PREEXE
a BEGIN block
@ PM_CONTEXT_DEFINED
a defined? expression
@ PM_CONTEXT_MODULE_ENSURE
an ensure statement within a module statement
@ PM_CONTEXT_BEGIN_RESCUE
a rescue statement with an explicit begin
@ PM_CONTEXT_UNTIL
an until statement
@ PM_CONTEXT_DEF_ELSE
a rescue else statement within a method definition
@ PM_CONTEXT_FOR
a for loop
@ PM_CONTEXT_PREDICATE
a predicate inside an if/elsif/unless statement
@ PM_CONTEXT_BEGIN_ENSURE
an ensure statement with an explicit begin
@ PM_CONTEXT_SCLASS_ENSURE
an ensure statement with a singleton class
@ PM_CONTEXT_DEFAULT_PARAMS
a method definition's default parameter
@ PM_CONTEXT_LAMBDA_ELSE
a rescue else statement within a lambda expression
@ PM_CONTEXT_CLASS
a class declaration
@ PM_CONTEXT_MAIN
the top level context
@ PM_CONTEXT_LAMBDA_DO_END
a lambda expression with do..end
@ PM_CONTEXT_BEGIN
a begin statement
@ PM_CONTEXT_RESCUE_MODIFIER
a modifier rescue clause
@ PM_CONTEXT_EMBEXPR
an interpolated expression
@ PM_CONTEXT_TERNARY
a ternary expression
@ PM_CONTEXT_DEF
a method definition
@ PM_CONTEXT_SCLASS_ELSE
a rescue else statement with a singleton class
@ PM_CONTEXT_MODULE_RESCUE
a rescue statement within a module statement
@ PM_CONTEXT_BLOCK_KEYWORDS
expressions in block arguments using do..end
@ PM_CONTEXT_WHILE
a while statement
uint8_t pm_scope_parameters_t
The flags about scope parameters that can be set.
uint8_t pm_shareable_constant_value_t
The type of shareable constant value that can be set.
pm_comment_type_t
This is the type of a comment that we've found while parsing.
void pm_buffer_free(pm_buffer_t *buffer)
Free the memory associated with the buffer.
bool pm_buffer_init(pm_buffer_t *buffer)
Initialize a pm_buffer_t with its default values.
size_t pm_buffer_length(const pm_buffer_t *buffer)
Return the length of the buffer.
char * pm_buffer_value(const pm_buffer_t *buffer)
Return the value of the buffer.
#define PM_CONSTANT_ID_UNSET
When we allocate constants into the pool, we reserve 0 to mean that the slot is not yet filled.
uint32_t pm_constant_id_t
A constant id is a unique identifier for a constant in the constant pool.
PRISM_EXPORTED_FUNCTION size_t pm_string_length(const pm_string_t *string)
Returns the length associated with the string.
PRISM_EXPORTED_FUNCTION const uint8_t * pm_string_source(const pm_string_t *string)
Returns the start pointer associated with the string.
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
#define PM_STRING_EMPTY
Defines an empty string.
#define PRISM_ALIGNOF
Get the alignment requirement of a type.
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
#define PRISM_UNLIKELY(x)
The compiler should predicate that this branch will not be taken.
#define PRISM_ATTRIBUTE_UNUSED
GCC will warn if you specify a function or parameter that is unused at runtime.
#define PRISM_DEPTH_MAXIMUM
When we are parsing using recursive descent, we want to protect against malicious payloads that could...
#define PM_STATIC_ASSERT(line, condition, message)
We want to be able to use static assertions, but they weren't standardized until C11.
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
#define PM_ENCODING_US_ASCII_ENTRY
This is the US-ASCII encoding.
#define PM_ENCODING_UTF_8_ENTRY
This is the default UTF-8 encoding.
#define PRISM_ENCODING_ALPHABETIC_BIT
All of the lookup tables use the first bit of each embedded byte to indicate whether the codepoint is...
#define PRISM_ENCODING_ALPHANUMERIC_BIT
All of the lookup tables use the second bit of each embedded byte to indicate whether the codepoint i...
#define PM_NODE_LIST_FOREACH(list, index, node)
Loop through each node in the node list, writing each node to the given pm_node_t pointer.
#define PRISM_VERSION
The version of the Prism library as a constant string.
#define PRISM_VERSION_PATCH
The patch version of the Prism library as an int.
#define PRISM_VERSION_MINOR
The minor version of the Prism library as an int.
#define PRISM_VERSION_MAJOR
The major version of the Prism library as an int.
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse(pm_parser_t *parser)
Parse the Ruby source associated with the given parser and return the tree.
PRISM_EXPORTED_FUNCTION void pm_parser_register_encoding_changed_callback(pm_parser_t *parser, pm_encoding_changed_callback_t callback)
Register a callback that will be called whenever prism changes the encoding it is using to parse base...
PRISM_EXPORTED_FUNCTION void pm_parser_init(pm_arena_t *arena, pm_parser_t *parser, const uint8_t *source, size_t size, const pm_options_t *options)
Initialize a parser with the given start and end pointers.
PRISM_EXPORTED_FUNCTION void pm_parser_free(pm_parser_t *parser)
Free any memory associated with the given parser.
PRISM_EXPORTED_FUNCTION pm_node_t * pm_parse_stream(pm_arena_t *arena, pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *stream_fgets, pm_parse_stream_feof_t *stream_feof, const pm_options_t *options)
Parse a stream of Ruby source and return the tree.
The main header file for the prism parser.
pm_string_query_t
Represents the results of a slice query.
@ PM_STRING_QUERY_TRUE
Returned if the result of the slice query is true.
@ PM_STRING_QUERY_ERROR
Returned if the encoding given to a slice query was invalid.
@ PM_STRING_QUERY_FALSE
Returned if the result of the slice query is false.
void pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer)
Serialize the encoding, metadata, nodes, and constant pool.
char *() pm_parse_stream_fgets_t(char *string, int size, void *stream)
This function is used in pm_parse_stream() to retrieve a line of input from a stream.
void pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer)
Serialize the name of the encoding to the buffer.
void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer)
Serialize the given list of comments to the given buffer.
int() pm_parse_stream_feof_t(void *stream)
This function is used in pm_parse_stream to check whether a stream is EOF.
const char * pm_token_type_human(pm_token_type_t token_type)
Returns the human name of the given token type.
This struct is used to pass information between the regular expression parser and the error callback.
pm_parser_t * parser
The parser that we are parsing the regular expression for.
const uint8_t * start
The start of the regular expression.
bool shared
Whether or not the source of the regular expression is shared.
const uint8_t * end
The end of the regular expression.
This struct is used to pass information between the regular expression parser and the named capture c...
pm_constant_id_list_t names
The list of names that have been parsed.
pm_parser_t * parser
The parser that is parsing the regular expression.
pm_match_write_node_t * match
The match write node that is being created.
pm_call_node_t * call
The call node wrapping the regular expression node.
bool shared
Whether the content of the regular expression is shared.
PM_NODE_ALIGNAS struct pm_node * left
AndNode::left.
PM_NODE_ALIGNAS struct pm_node * right
AndNode::right.
pm_node_t base
The embedded base node.
struct pm_node_list arguments
ArgumentsNode::arguments.
This is a special out parameter to the parse_arguments_list function that includes opening and closin...
pm_node_t * block
The optional block attached to the call.
bool has_forwarding
The flag indicating whether this arguments list has forwarding argument.
pm_location_t opening_loc
The optional location of the opening parenthesis or bracket.
pm_arguments_node_t * arguments
The lazily-allocated optional arguments node.
pm_location_t closing_loc
The optional location of the closing parenthesis or bracket.
struct pm_node_list elements
ArrayNode::elements.
PM_NODE_ALIGNAS struct pm_node * constant
ArrayPatternNode::constant.
pm_location_t opening_loc
ArrayPatternNode::opening_loc.
pm_location_t closing_loc
ArrayPatternNode::closing_loc.
PM_NODE_ALIGNAS struct pm_node * value
AssocNode::value.
PM_NODE_ALIGNAS struct pm_node * key
AssocNode::key.
PM_NODE_ALIGNAS struct pm_else_node * else_clause
BeginNode::else_clause.
PM_NODE_ALIGNAS struct pm_ensure_node * ensure_clause
BeginNode::ensure_clause.
PM_NODE_ALIGNAS struct pm_statements_node * statements
BeginNode::statements.
PM_NODE_ALIGNAS struct pm_rescue_node * rescue_clause
BeginNode::rescue_clause.
pm_node_t base
The embedded base node.
This struct represents a set of binding powers used for a given token.
bool binary
Whether or not this token can be used as a binary operator.
pm_binding_power_t left
The left binding power.
bool nonassoc
Whether or not this token can be used as non-associative binary operator.
pm_binding_power_t right
The right binding power.
pm_location_t opening_loc
BlockNode::opening_loc.
A pm_buffer_t is a simple memory buffer that stores data in a contiguous block of memory.
size_t length
The length of the buffer in bytes.
char * value
A pointer to the start of the buffer.
pm_location_t opening_loc
CallNode::opening_loc.
pm_location_t closing_loc
CallNode::closing_loc.
pm_constant_id_t name
CallNode::name.
PM_NODE_ALIGNAS struct pm_arguments_node * arguments
CallNode::arguments.
pm_location_t equal_loc
CallNode::equal_loc.
pm_location_t call_operator_loc
CallNode::call_operator_loc.
pm_location_t message_loc
CallNode::message_loc.
PM_NODE_ALIGNAS struct pm_node * block
CallNode::block.
PM_NODE_ALIGNAS struct pm_node * receiver
CallNode::receiver.
struct pm_node_list conditions
CaseMatchNode::conditions.
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseMatchNode::else_clause.
PM_NODE_ALIGNAS struct pm_else_node * else_clause
CaseNode::else_clause.
struct pm_node_list conditions
CaseNode::conditions.
size_t size
The number of constant ids in the list.
A constant in the pool which effectively stores a string.
size_t length
The length of the string.
const uint8_t * start
A pointer to the start of the string.
This is a node in a linked list of contexts.
pm_context_t context
The context that this node represents.
struct pm_context_node * prev
A pointer to the previous context in the linked list.
PM_NODE_ALIGNAS struct pm_statements_node * statements
ElseNode::statements.
This struct defines the functions necessary to implement the encoding interface so we can determine h...
size_t(* alpha_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphab...
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
bool(* isupper_char)(const uint8_t *b, ptrdiff_t n)
Return true if the next character is valid in the encoding and is an uppercase character.
const char * name
The name of the encoding.
size_t(* alnum_char)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding and is alphan...
PM_NODE_ALIGNAS struct pm_statements_node * statements
EnsureNode::statements.
pm_location_t opening_loc
FindPatternNode::opening_loc.
PM_NODE_ALIGNAS struct pm_node * constant
FindPatternNode::constant.
pm_location_t closing_loc
FindPatternNode::closing_loc.
double value
FloatNode::value.
pm_node_t base
The embedded base node.
GlobalVariableTargetNode.
struct pm_node_list elements
HashNode::elements.
PM_NODE_ALIGNAS struct pm_node * constant
HashPatternNode::constant.
pm_location_t opening_loc
HashPatternNode::opening_loc.
pm_location_t closing_loc
HashPatternNode::closing_loc.
All of the information necessary to store to lexing a heredoc.
size_t ident_length
The length of the heredoc identifier.
pm_heredoc_quote_t quote
The type of quote that the heredoc uses.
pm_heredoc_indent_t indent
The type of indentation that the heredoc uses.
const uint8_t * ident_start
A pointer to the start of the heredoc identifier.
PM_NODE_ALIGNAS struct pm_statements_node * statements
IfNode::statements.
PM_NODE_ALIGNAS struct pm_node * subsequent
IfNode::subsequent.
PM_NODE_ALIGNAS struct pm_statements_node * statements
InNode::statements.
InstanceVariableReadNode.
InstanceVariableTargetNode.
InstanceVariableWriteNode.
pm_integer_t value
IntegerNode::value.
pm_node_t base
The embedded base node.
bool negative
Whether or not the integer is negative.
InterpolatedMatchLastLineNode.
InterpolatedRegularExpressionNode.
pm_node_t base
The embedded base node.
pm_location_t opening_loc
InterpolatedStringNode::opening_loc.
pm_location_t opening_loc
InterpolatedXStringNode::opening_loc.
pm_node_t base
The embedded base node.
struct pm_node_list parts
InterpolatedXStringNode::parts.
void(* callback)(void *data, pm_parser_t *parser, pm_token_t *token)
This is the callback that is called when a token is lexed.
void * data
This opaque pointer is used to provide whatever information the user deemed necessary to the callback...
When lexing Ruby source, the lexer has a small amount of state to tell which kind of token it is curr...
uint8_t terminator
This is the terminator of the list literal.
size_t nesting
This keeps track of the nesting level of the list.
bool interpolation
Whether or not interpolation is allowed in this list.
uint8_t incrementor
When lexing a list, it takes into account balancing the terminator if the terminator is one of (),...
enum pm_lex_mode::@98 mode
The type of this lex mode.
uint8_t breakpoints[11]
This is the character set that should be used to delimit the tokens within the list.
pm_heredoc_lex_mode_t base
All of the data necessary to lex a heredoc.
bool line_continuation
True if the previous token ended with a line continuation.
struct pm_lex_mode * prev
The previous lex state so that it knows how to pop.
bool label_allowed
Whether or not at the end of the string we should allow a :, which would indicate this was a dynamic ...
const uint8_t * next_start
This is the pointer to the character where lexing should resume once the heredoc has been completely ...
union pm_lex_mode::@99 as
The data associated with this type of lex mode.
size_t * common_whitespace
This is used to track the amount of common whitespace on each line so that we know how much to dedent...
int32_t line
The line number.
uint32_t * offsets
The list of offsets.
size_t size
The number of offsets in the list.
This struct represents an abstract linked list that provides common functionality.
struct pm_list_node * next
A pointer to the next node in the list.
This represents the overall linked list.
pm_list_node_t * head
A pointer to the head of the list.
size_t size
The size of the list.
This tracks an individual local variable in a certain lexical context, as well as the number of times...
pm_constant_id_t name
The name of the local variable.
pm_location_t location
The location of the local variable in the source.
uint32_t hash
The hash of the local variable.
uint32_t index
The index of the local variable in the local table.
uint32_t reads
The number of times the local variable is read.
uint32_t depth
LocalVariableReadNode::depth.
pm_constant_id_t name
LocalVariableReadNode::name.
uint32_t depth
LocalVariableWriteNode::depth.
pm_constant_id_t name
LocalVariableWriteNode::name.
This is a set of local variables in a certain lexical context (method, class, module,...
pm_local_t * locals
The nullable allocated memory for the local variables in the set.
uint32_t capacity
The capacity of the local variables set.
uint32_t size
The number of local variables in the set.
This struct represents a slice in the source code, defined by an offset and a length.
uint32_t start
The offset of the location from the start of the source.
uint32_t length
The length of the location.
struct pm_node_list targets
MatchWriteNode::targets.
pm_location_t lparen_loc
MultiTargetNode::lparen_loc.
struct pm_node_list lefts
MultiTargetNode::lefts.
pm_location_t rparen_loc
MultiTargetNode::rparen_loc.
A list of nodes in the source, most often used for lists of children.
size_t size
The number of nodes in the list.
struct pm_node ** nodes
The nodes in the list.
This is the base structure that represents a node in the syntax tree.
pm_node_type_t type
This represents the type of the node.
pm_location_t location
This is the location of the node in the source.
A scope of locals surrounding the code that is being parsed.
size_t locals_count
The number of locals in the scope.
uint8_t forwarding
Flags for the set of forwarding parameters in this scope.
The options that can be passed to the parser.
uint8_t command_line
A bitset of the various options that were set on the command line.
void * shebang_callback_data
Any additional data that should be passed along to the shebang callback if one was set.
bool encoding_locked
Whether or not the encoding magic comments should be respected.
bool main_script
When the file being parsed is the main script, the shebang will be considered for command-line flags ...
pm_string_t encoding
The name of the encoding that the source file is in.
int32_t line
The line within the file that the parse starts on.
pm_options_shebang_callback_t shebang_callback
The callback to call when additional switches are found in a shebang comment.
int8_t frozen_string_literal
Whether or not the frozen string literal option has been set.
bool partial_script
When the file being parsed is considered a "partial" script, jumps will not be marked as errors if th...
size_t scopes_count
The number of scopes surrounding the code that is being parsed.
pm_string_t filepath
The name of the file that is currently being parsed.
pm_options_version_t version
The version of prism that we should be parsing with.
PM_NODE_ALIGNAS struct pm_node * right
OrNode::right.
PM_NODE_ALIGNAS struct pm_node * left
OrNode::left.
PM_NODE_ALIGNAS struct pm_node * block
ParametersNode::block.
PM_NODE_ALIGNAS struct pm_node * rest
ParametersNode::rest.
PM_NODE_ALIGNAS struct pm_node * keyword_rest
ParametersNode::keyword_rest.
PM_NODE_ALIGNAS struct pm_node * body
ParenthesesNode::body.
This struct represents the overall parser.
const pm_encoding_t * explicit_encoding
When a string-like expression is being lexed, any byte or escape sequence that resolves to a value wh...
pm_lex_state_t lex_state
The current state of the lexer.
uint8_t command_line
The command line flags given from the options.
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
bool partial_script
Whether or not we are parsing a "partial" script, which is a script that will be evaluated in the con...
bool pattern_matching_newlines
This flag indicates that we are currently parsing a pattern matching expression and impacts that calc...
const uint8_t * end
The pointer to the end of the source.
bool recovering
Whether or not we're currently recovering from a syntax error.
pm_node_flags_t integer_base
We want to add a flag to integer nodes that indicates their base.
bool warn_mismatched_indentation
By default, Ruby always warns about mismatched indentation.
pm_constant_pool_t constant_pool
This constant pool keeps all of the constants defined throughout the file so that we can reference th...
bool in_keyword_arg
This flag indicates that we are currently parsing a keyword argument.
const uint8_t * next_start
This is a special field set on the parser when we need the parser to jump to a specific location when...
pm_static_literals_t * current_hash_keys
The hash keys for the hash that is currently being parsed.
pm_list_t magic_comment_list
The list of magic comments that have been found while parsing.
int lambda_enclosure_nesting
Used to temporarily track the nesting of enclosures to determine if a { is the beginning of a lambda ...
pm_lex_callback_t * lex_callback
This is an optional callback that can be attached to the parser that will be called whenever a new to...
pm_options_version_t version
The version of prism that we should use to parse.
pm_token_t previous
The previous token we were considering.
pm_string_t current_string
This string is used to pass information from the lexer to the parser.
bool parsing_eval
Whether or not we are parsing an eval string.
bool current_regular_expression_ascii_only
True if the current regular expression being lexed contains only ASCII characters.
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
pm_location_t data_loc
An optional location that represents the location of the END marker and the rest of the content of th...
pm_context_node_t * current_context
The current parsing context.
const uint8_t * start
The pointer to the start of the source.
int enclosure_nesting
Tracks the current nesting of (), [], and {}.
pm_line_offset_list_t line_offsets
This is the list of line offsets in the source file.
pm_list_t error_list
The list of errors that have been found while parsing.
int8_t frozen_string_literal
Whether or not we have found a frozen_string_literal magic comment with a true or false value.
pm_node_list_t * current_block_exits
When parsing block exits (e.g., break, next, redo), we need to validate that they are in correct cont...
const uint8_t * encoding_comment_start
This pointer indicates where a comment must start if it is to be considered an encoding comment.
pm_lex_mode_t stack[PM_LEX_STACK_SIZE]
The stack of lexer modes.
pm_list_t warning_list
The list of warnings that have been found while parsing.
const uint8_t * heredoc_end
This field indicates the end of a heredoc whose identifier was found on the current line.
int brace_nesting
Used to track the nesting of braces to ensure we get the correct value when we are interpolating bloc...
pm_encoding_changed_callback_t encoding_changed_callback
When the encoding that is being used to parse the source is changed by prism, we provide the ability ...
struct pm_parser::@104 lex_modes
A stack of lex modes.
int32_t start_line
The line number at the start of the parse.
bool encoding_locked
This is very specialized behavior for when you want to parse in a context that does not respect encod...
pm_lex_mode_t * current
The current mode of the lexer.
pm_arena_t * arena
The arena used for all AST-lifetime allocations.
pm_list_t comment_list
The list of comments that have been found while parsing.
size_t index
The current index into the lexer mode stack.
pm_string_t filepath
This is the path of the file being parsed.
pm_scope_t * current_scope
The current local scope.
bool command_start
Whether or not we're at the beginning of a command.
bool semantic_token_seen
Whether or not the parser has seen a token that has semantic meaning (i.e., a token that is not a com...
PM_NODE_ALIGNAS struct pm_node * right
RangeNode::right.
PM_NODE_ALIGNAS struct pm_node * left
RangeNode::left.
pm_node_t base
The embedded base node.
pm_integer_t numerator
RationalNode::numerator.
In order to properly set a regular expression's encoding and to validate the byte sequence for the un...
pm_buffer_t regexp_buffer
The buffer holding the regexp source.
pm_token_buffer_t base
The embedded base buffer.
pm_string_t unescaped
RegularExpressionNode::unescaped.
PM_NODE_ALIGNAS struct pm_node * rescue_expression
RescueModifierNode::rescue_expression.
PM_NODE_ALIGNAS struct pm_rescue_node * subsequent
RescueNode::subsequent.
pm_location_t then_keyword_loc
RescueNode::then_keyword_loc.
This struct represents a node in a linked list of scopes.
struct pm_scope * previous
A pointer to the previous scope in the linked list.
pm_node_list_t implicit_parameters
This is a list of the implicit parameters contained within the block.
pm_shareable_constant_value_t shareable_constant
The current state of constant shareability for this scope.
pm_locals_t locals
The IDs of the locals in the given scope.
pm_scope_parameters_t parameters
This is a bitfield that indicates the parameters that are being used in this scope.
bool closed
A boolean indicating whether or not this scope can see into its parent.
PM_NODE_ALIGNAS struct pm_node * expression
SplatNode::expression.
struct pm_node_list body
StatementsNode::body.
pm_node_t base
The embedded base node.
Certain sets of nodes (hash keys and when clauses) check for duplicate nodes to alert the user of pot...
pm_node_t base
The embedded base node.
pm_string_t unescaped
StringNode::unescaped.
pm_location_t content_loc
StringNode::content_loc.
pm_location_t closing_loc
StringNode::closing_loc.
pm_location_t opening_loc
StringNode::opening_loc.
A generic string type that can have various ownership semantics.
const uint8_t * source
A pointer to the start of the string.
size_t length
The length of the string in bytes of memory.
enum pm_string_t::@105 type
The type of the string.
pm_location_t value_loc
SymbolNode::value_loc.
pm_string_t unescaped
SymbolNode::unescaped.
When we're lexing certain types (strings, symbols, lists, etc.) we have string content associated wit...
pm_buffer_t buffer
The buffer that we're using to keep track of the string content.
const uint8_t * cursor
The cursor into the source string that points to how far we have currently copied into the buffer.
This struct represents a token in the Ruby source.
const uint8_t * end
A pointer to the end location of the token in the source.
const uint8_t * start
A pointer to the start location of the token in the source.
pm_token_type_t type
The type of the token.
PM_NODE_ALIGNAS struct pm_statements_node * statements
UnlessNode::statements.
PM_NODE_ALIGNAS struct pm_else_node * else_clause
UnlessNode::else_clause.
PM_NODE_ALIGNAS struct pm_statements_node * statements
WhenNode::statements.