3#define PM_REGEXP_PARSE_DEPTH_MAX 4096 
   50pm_regexp_parse_error(
pm_regexp_parser_t *parser, 
const uint8_t *start, 
const uint8_t *end, 
const char *message) {
 
   59pm_regexp_parser_named_capture(
pm_regexp_parser_t *parser, 
const uint8_t *start, 
const uint8_t *end) {
 
   61    pm_string_shared_init(&
string, start, end);
 
   79    if (!pm_regexp_char_is_eof(parser) && *parser->
cursor == value) {
 
   91    if (!pm_regexp_char_is_eof(parser) && *parser->
cursor == value) {
 
  103    if (pm_regexp_char_is_eof(parser)) {
 
  151    const uint8_t *savepoint = parser->
cursor;
 
  154        PM_REGEXP_RANGE_QUANTIFIER_STATE_START,
 
  155        PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
 
  156        PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
 
  157        PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
 
  158    } state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
 
  162            parser->
cursor = savepoint;
 
  167            case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
 
  168                switch (*parser->
cursor) {
 
  169                    case '0': 
case '1': 
case '2': 
case '3': 
case '4': 
case '5': 
case '6': 
case '7': 
case '8': 
case '9':
 
  171                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
 
  175                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
 
  178                        parser->
cursor = savepoint;
 
  182            case PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
 
  183                switch (*parser->
cursor) {
 
  184                    case '0': 
case '1': 
case '2': 
case '3': 
case '4': 
case '5': 
case '6': 
case '7': 
case '8': 
case '9':
 
  189                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
 
  195                        parser->
cursor = savepoint;
 
  199            case PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
 
  200                switch (*parser->
cursor) {
 
  201                    case '0': 
case '1': 
case '2': 
case '3': 
case '4': 
case '5': 
case '6': 
case '7': 
case '8': 
case '9':
 
  203                        state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
 
  206                        parser->
cursor = savepoint;
 
  210            case PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
 
  211                switch (*parser->
cursor) {
 
  212                    case '0': 
case '1': 
case '2': 
case '3': 
case '4': 
case '5': 
case '6': 
case '7': 
case '8': 
case '9':
 
  219                        parser->
cursor = savepoint;
 
  239    while (!pm_regexp_char_is_eof(parser)) {
 
  240        switch (*parser->
cursor) {
 
  248                if (!pm_regexp_parse_range_quantifier(parser)) 
return false;
 
  265    if (!pm_regexp_char_expect(parser, 
':')) {
 
  269    pm_regexp_char_accept(parser, 
'^');
 
  272        pm_regexp_char_find(parser, 
':') &&
 
  273        pm_regexp_char_expect(parser, 
']') &&
 
  274        pm_regexp_char_expect(parser, 
']')
 
  288    pm_regexp_char_accept(parser, 
'^');
 
  290    while (!pm_regexp_char_is_eof(parser) && *parser->
cursor != 
']') {
 
  291        switch (*parser->
cursor++) {
 
  293                pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
 
  296                if (!pm_regexp_char_is_eof(parser)) {
 
  306    return pm_regexp_char_expect(parser, 
']');
 
  314    if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
 
  315        pm_regexp_parse_error(parser, parser->
start, parser->
end, 
"parse depth limit over");
 
  321        pm_regexp_parse_error(parser, parser->
cursor - 1, parser->
cursor, 
"empty char-class");
 
  325    const uint8_t *reset = parser->
cursor;
 
  329        if (pm_regexp_parse_posix_class(parser)) 
return true;
 
  334    return pm_regexp_parse_character_set(parser, depth);
 
  347    PM_REGEXP_OPTION_STATE_INVALID,
 
  348    PM_REGEXP_OPTION_STATE_TOGGLEABLE,
 
  349    PM_REGEXP_OPTION_STATE_ADDABLE,
 
  350    PM_REGEXP_OPTION_STATE_ADDED,
 
  351    PM_REGEXP_OPTION_STATE_REMOVED
 
  352} pm_regexp_option_state_t;
 
  357#define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a' 
  358#define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x' 
  359#define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1) 
  366    uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
 
 
  374    memset(options, PM_REGEXP_OPTION_STATE_INVALID, 
sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
 
  375    options->
values[
'i' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
 
  376    options->
values[
'm' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
 
  377    options->
values[
'x' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
 
  378    options->
values[
'd' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
 
  379    options->
values[
'a' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
 
  380    options->
values[
'u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
 
  389    if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
 
  390        key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
 
  392        switch (options->
values[key]) {
 
  393            case PM_REGEXP_OPTION_STATE_INVALID:
 
  394            case PM_REGEXP_OPTION_STATE_REMOVED:
 
  396            case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
 
  397            case PM_REGEXP_OPTION_STATE_ADDABLE:
 
  398                options->
values[key] = PM_REGEXP_OPTION_STATE_ADDED;
 
  400            case PM_REGEXP_OPTION_STATE_ADDED:
 
  414    if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
 
  415        key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
 
  417        switch (options->
values[key]) {
 
  418            case PM_REGEXP_OPTION_STATE_INVALID:
 
  419            case PM_REGEXP_OPTION_STATE_ADDABLE:
 
  421            case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
 
  422            case PM_REGEXP_OPTION_STATE_ADDED:
 
  423            case PM_REGEXP_OPTION_STATE_REMOVED:
 
  424                options->
values[key] = PM_REGEXP_OPTION_STATE_REMOVED;
 
  437    if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
 
  438        key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
 
  439        return options->
values[key];
 
  468    const uint8_t *group_start = parser->
cursor;
 
  471    pm_regexp_options_init(&options);
 
  474    if (pm_regexp_char_accept(parser, 
'?')) {
 
  475        if (pm_regexp_char_is_eof(parser)) {
 
  476            pm_regexp_parse_error(parser, group_start, parser->
cursor, 
"end pattern in group");
 
  480        switch (*parser->
cursor) {
 
  483                if (pm_regexp_char_is_eof(parser)) {
 
  484                    pm_regexp_parse_error(parser, group_start, parser->
cursor, 
"end pattern in group");
 
  489                    bool escaped = 
false;
 
  495                        if (!escaped && *parser->
cursor == 
')') {
 
  501                        if (width == 0) 
return false;
 
  503                        escaped = (width == 1) && (*parser->
cursor == 
'\\');
 
  512                    bool found = pm_regexp_char_find(parser, 
')');
 
  514                    while (found && (parser->
start <= parser->
cursor - 2) && (*(parser->
cursor - 2) == 
'\\')) {
 
  515                        found = pm_regexp_char_find(parser, 
')');
 
  530                if (pm_regexp_char_is_eof(parser)) {
 
  531                    pm_regexp_parse_error(parser, group_start, parser->
cursor, 
"end pattern with unmatched parenthesis");
 
  535                switch (*parser->
cursor) {
 
  541                        const uint8_t *start = parser->
cursor;
 
  542                        if (!pm_regexp_char_find(parser, 
'>')) {
 
  546                        if (parser->
cursor - start == 1) {
 
  547                            pm_regexp_parse_error(parser, start, parser->
cursor, 
"group name is empty");
 
  551                            pm_regexp_parser_named_capture(parser, start, parser->
cursor - 1);
 
  559                const uint8_t *start = ++parser->
cursor;
 
  560                if (!pm_regexp_char_find(parser, 
'\'')) {
 
  565                    pm_regexp_parser_named_capture(parser, start, parser->
cursor - 1);
 
  571                if (!pm_regexp_char_find(parser, 
')')) {
 
  575            case 'i': 
case 'm': 
case 'x': 
case 'd': 
case 'a': 
case 'u': 
 
  576                while (!pm_regexp_char_is_eof(parser) && *parser->
cursor != 
'-' && *parser->
cursor != 
':' && *parser->
cursor != 
')') {
 
  577                    if (!pm_regexp_options_add(&options, *parser->
cursor)) {
 
  583                if (pm_regexp_char_is_eof(parser)) {
 
  590                if (*parser->
cursor == 
')') {
 
  591                    if (pm_regexp_options_state(&options, 
'x') == PM_REGEXP_OPTION_STATE_ADDED) {
 
  600                if (*parser->
cursor != 
'-') 
break;
 
  605                while (!pm_regexp_char_is_eof(parser) && *parser->
cursor != 
':' && *parser->
cursor != 
')') {
 
  606                    if (!pm_regexp_options_remove(&options, *parser->
cursor)) {
 
  612                if (pm_regexp_char_is_eof(parser)) {
 
  619                if (*parser->
cursor == 
')') {
 
  620                    switch (pm_regexp_options_state(&options, 
'x')) {
 
  621                        case PM_REGEXP_OPTION_STATE_ADDED:
 
  624                        case PM_REGEXP_OPTION_STATE_REMOVED:
 
  636                pm_regexp_parse_error(parser, parser->
cursor - 1, parser->
cursor, 
"undefined group option");
 
  642    switch (pm_regexp_options_state(&options, 
'x')) {
 
  643        case PM_REGEXP_OPTION_STATE_ADDED:
 
  646        case PM_REGEXP_OPTION_STATE_REMOVED:
 
  652    while (!pm_regexp_char_is_eof(parser) && *parser->
cursor != 
')') {
 
  653        if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
 
  657        pm_regexp_char_accept(parser, 
'|');
 
  662    if (pm_regexp_char_expect(parser, 
')')) 
return true;
 
  664    pm_regexp_parse_error(parser, group_start, parser->
cursor, 
"end pattern with unmatched parenthesis");
 
  682    switch (*parser->
cursor) {
 
  686            return pm_regexp_parse_quantifier(parser);
 
  689            if (!pm_regexp_char_is_eof(parser)) {
 
  692            return pm_regexp_parse_quantifier(parser);
 
  695            return pm_regexp_parse_group(parser, depth) && pm_regexp_parse_quantifier(parser);
 
  698            return pm_regexp_parse_lbracket(parser, depth) && pm_regexp_parse_quantifier(parser);
 
  703            pm_regexp_parse_error(parser, parser->
cursor - 1, parser->
cursor, 
"target of repeat operator is not specified");
 
  707            pm_regexp_parse_error(parser, parser->
cursor - 1, parser->
cursor, 
"unmatched close parenthesis");
 
  711                if (!pm_regexp_char_find(parser, 
'\n')) parser->
cursor = parser->
end;
 
  718                width = pm_encoding_utf_8_char_width(parser->
cursor, (ptrdiff_t) (parser->
end - parser->
cursor));
 
  723            if (width == 0) 
return false; 
 
  726            return pm_regexp_parse_quantifier(parser);
 
  737    if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
 
  738        pm_regexp_parse_error(parser, parser->
start, parser->
end, 
"parse depth limit over");
 
  742    if (!pm_regexp_parse_item(parser, depth)) {
 
  746    while (!pm_regexp_char_is_eof(parser) && *parser->
cursor != 
')' && *parser->
cursor != 
'|') {
 
  747        if (!pm_regexp_parse_item(parser, depth)) {
 
  764        if (pm_regexp_char_is_eof(parser)) 
return true;
 
  765        if (!pm_regexp_parse_expression(parser, 0)) 
return false;
 
  766    } 
while (pm_regexp_char_accept(parser, 
'|'));
 
  768    return pm_regexp_char_is_eof(parser);
 
  781        .end = source + size,
 
  782        .extended_mode = extended_mode,
 
  785        .name_callback = name_callback,
 
  786        .name_data = name_data,
 
  787        .error_callback = error_callback,
 
  788        .error_data = error_data
 
 
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
#define PRISM_FALLTHROUGH
We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch.
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
A regular expression parser.
void(* pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data)
This callback is called by pm_regexp_parse() when a parse error is found.
void(* pm_regexp_name_callback_t)(const pm_string_t *name, void *data)
This callback is called by pm_regexp_parse() when a named capture group is found.
This struct defines the functions necessary to implement the encoding interface so we can determine h...
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
bool multibyte
Return true if the encoding is a multibyte encoding.
This struct represents the overall parser.
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
const uint8_t * start
The pointer to the start of the source.
This is the set of options that are configurable on the regular expression.
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS]
The current state of each option.
This is the parser that is going to handle parsing regular expressions.
const uint8_t * cursor
A pointer to the current position in the source.
pm_regexp_error_callback_t error_callback
The callback to call when a parse error is found.
const uint8_t * start
A pointer to the start of the source that we are parsing.
const uint8_t * end
A pointer to the end of the source that we are parsing.
void * name_data
The data to pass to the name callback.
bool extended_mode
Whether or not the regular expression currently being parsed is in extended mode, wherein whitespace ...
pm_parser_t * parser
The parser that is currently being used.
const pm_encoding_t * encoding
The encoding of the source.
void * error_data
The data to pass to the error callback.
pm_regexp_name_callback_t name_callback
The callback to call when a named capture group is found.
bool encoding_changed
Whether the encoding has changed from the default.
A generic string type that can have various ownership semantics.