|
Ruby 4.1.0dev (2026-03-20 revision eb04ab9117336f2b3613244cfe8a528c52faf6d6)
|
A regular expression parser. More...
#include "prism/defines.h"#include "prism/parser.h"#include "prism/encoding.h"#include "prism/util/pm_memchr.h"#include "prism/util/pm_string.h"#include <stdbool.h>#include <stddef.h>#include <string.h>

Go to the source code of this file.
Data Structures | |
| struct | pm_regexp_name_data_t |
| Accumulation state for named capture groups found during regexp parsing. More... | |
Typedefs | |
| typedef void(* | pm_regexp_name_callback_t) (pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data) |
| Callback invoked by pm_regexp_parse() for each named capture group found. | |
Functions | |
| PRISM_EXPORTED_FUNCTION pm_node_flags_t | pm_regexp_parse (pm_parser_t *parser, pm_regular_expression_node_t *node, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) |
| Parse a regular expression, validate its encoding, and optionally extract named capture groups. | |
| void | pm_regexp_parse_named_captures (pm_parser_t *parser, const uint8_t *source, size_t size, bool shared, bool extended_mode, pm_regexp_name_callback_t name_callback, pm_regexp_name_data_t *name_data) |
| Parse an interpolated regular expression for named capture groups only. | |
A regular expression parser.
Definition in file regexp.h.
| typedef void(* pm_regexp_name_callback_t) (pm_parser_t *parser, const pm_string_t *name, bool shared, pm_regexp_name_data_t *data) |
Callback invoked by pm_regexp_parse() for each named capture group found.
| parser | The main parser. |
| name | The name of the capture group. |
| shared | Whether the source content is shared (impacts constant storage). |
| data | The accumulation state for named captures. |
| PRISM_EXPORTED_FUNCTION pm_node_flags_t pm_regexp_parse | ( | pm_parser_t * | parser, |
| pm_regular_expression_node_t * | node, | ||
| pm_regexp_name_callback_t | name_callback, | ||
| pm_regexp_name_data_t * | name_data | ||
| ) |
Parse a regular expression, validate its encoding, and optionally extract named capture groups.
Returns the encoding flags to set on the node.
| parser | The parser that is currently being used. |
| node | The regular expression node to parse and validate. |
| name_callback | The optional callback to call when a named capture group is found. |
| name_data | The optional accumulation state for named captures. |
Encoding validation walks the raw source (content_loc) to distinguish escape-produced bytes from literal bytes. Named capture extraction walks the unescaped content since escape sequences in group names (e.g., line continuations) have already been processed by the lexer.
| void pm_regexp_parse_named_captures | ( | pm_parser_t * | parser, |
| const uint8_t * | source, | ||
| size_t | size, | ||
| bool | shared, | ||
| bool | extended_mode, | ||
| pm_regexp_name_callback_t | name_callback, | ||
| pm_regexp_name_data_t * | name_data | ||
| ) |
Parse an interpolated regular expression for named capture groups only.
No encoding validation is performed.
| parser | The parser that is currently being used. |
| source | The source content to parse. |
| size | The length of the source content. |
| shared | Whether the source points into the parser's source buffer. |
| extended_mode | Whether or not the regular expression is in extended mode. |
| name_callback | The callback to call when a named capture group is found. |
| name_data | The accumulation state for named captures. |
This is used for the =~ operator with interpolated regexps where we don't have a pm_regular_expression_node_t. No encoding validation is performed.
Note: The encoding-tracking fields (has_unicode_escape, has_hex_escape, etc.) are initialized but not used for the result. They exist because the parsing functions (pm_regexp_parse_backslash_escape, etc.) unconditionally update them as they walk through the content.