Ruby  3.4.0dev (2024-11-05 revision e440268d51fe02b303e3817a7a733a0dac1c5091)
regexp.c
1 #include "prism/regexp.h"
2 
3 #define PM_REGEXP_PARSE_DEPTH_MAX 4096
4 
8 typedef struct {
11 
13  const uint8_t *start;
14 
16  const uint8_t *cursor;
17 
19  const uint8_t *end;
20 
26 
29 
32 
35 
37  void *name_data;
38 
41 
43  void *error_data;
45 
49 static inline void
50 pm_regexp_parse_error(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, const char *message) {
51  parser->error_callback(start, end, message, parser->error_data);
52 }
53 
58 static void
59 pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
60  pm_string_t string;
61  pm_string_shared_init(&string, start, end);
62  parser->name_callback(&string, parser->name_data);
63  pm_string_free(&string);
64 }
65 
69 static inline bool
70 pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
71  return parser->cursor >= parser->end;
72 }
73 
77 static inline bool
78 pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
79  if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
80  parser->cursor++;
81  return true;
82  }
83  return false;
84 }
85 
89 static inline bool
90 pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
91  if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
92  parser->cursor++;
93  return true;
94  }
95  return false;
96 }
97 
101 static bool
102 pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
103  if (pm_regexp_char_is_eof(parser)) {
104  return false;
105  }
106 
107  const uint8_t *end = (const uint8_t *) pm_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
108  if (end == NULL) {
109  return false;
110  }
111 
112  parser->cursor = end + 1;
113  return true;
114 }
115 
149 static bool
150 pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
151  const uint8_t *savepoint = parser->cursor;
152 
153  enum {
154  PM_REGEXP_RANGE_QUANTIFIER_STATE_START,
155  PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
156  PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
157  PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
158  } state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
159 
160  while (1) {
161  switch (state) {
162  case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
163  switch (*parser->cursor) {
164  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
165  parser->cursor++;
166  state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
167  break;
168  case ',':
169  parser->cursor++;
170  state = PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
171  break;
172  default:
173  parser->cursor = savepoint;
174  return true;
175  }
176  break;
177  case PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
178  switch (*parser->cursor) {
179  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
180  parser->cursor++;
181  break;
182  case ',':
183  parser->cursor++;
184  state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
185  break;
186  case '}':
187  parser->cursor++;
188  return true;
189  default:
190  parser->cursor = savepoint;
191  return true;
192  }
193  break;
194  case PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
195  switch (*parser->cursor) {
196  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
197  parser->cursor++;
198  state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
199  break;
200  default:
201  parser->cursor = savepoint;
202  return true;
203  }
204  break;
205  case PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
206  switch (*parser->cursor) {
207  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
208  parser->cursor++;
209  break;
210  case '}':
211  parser->cursor++;
212  return true;
213  default:
214  parser->cursor = savepoint;
215  return true;
216  }
217  break;
218  }
219  }
220 
221  return true;
222 }
223 
232 static bool
233 pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
234  while (!pm_regexp_char_is_eof(parser)) {
235  switch (*parser->cursor) {
236  case '*':
237  case '+':
238  case '?':
239  parser->cursor++;
240  break;
241  case '{':
242  parser->cursor++;
243  if (!pm_regexp_parse_range_quantifier(parser)) return false;
244  break;
245  default:
246  // In this case there is no quantifier.
247  return true;
248  }
249  }
250 
251  return true;
252 }
253 
258 static bool
259 pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
260  if (!pm_regexp_char_expect(parser, ':')) {
261  return false;
262  }
263 
264  pm_regexp_char_accept(parser, '^');
265 
266  return (
267  pm_regexp_char_find(parser, ':') &&
268  pm_regexp_char_expect(parser, ']') &&
269  pm_regexp_char_expect(parser, ']')
270  );
271 }
272 
273 // Forward declaration because character sets can be nested.
274 static bool
275 pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth);
276 
281 static bool
282 pm_regexp_parse_character_set(pm_regexp_parser_t *parser, uint16_t depth) {
283  pm_regexp_char_accept(parser, '^');
284 
285  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
286  switch (*parser->cursor++) {
287  case '[':
288  pm_regexp_parse_lbracket(parser, (uint16_t) (depth + 1));
289  break;
290  case '\\':
291  if (!pm_regexp_char_is_eof(parser)) {
292  parser->cursor++;
293  }
294  break;
295  default:
296  // do nothing, we've already advanced the cursor
297  break;
298  }
299  }
300 
301  return pm_regexp_char_expect(parser, ']');
302 }
303 
307 static bool
308 pm_regexp_parse_lbracket(pm_regexp_parser_t *parser, uint16_t depth) {
309  if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
310  pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
311  return false;
312  }
313 
314  if ((parser->cursor < parser->end) && parser->cursor[0] == ']') {
315  parser->cursor++;
316  pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "empty char-class");
317  return true;
318  }
319 
320  const uint8_t *reset = parser->cursor;
321 
322  if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
323  parser->cursor++;
324  if (pm_regexp_parse_posix_class(parser)) return true;
325 
326  parser->cursor = reset;
327  }
328 
329  return pm_regexp_parse_character_set(parser, depth);
330 }
331 
332 // Forward declaration here since parsing groups needs to go back up the grammar
333 // to parse expressions within them.
334 static bool
335 pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth);
336 
341 typedef enum {
342  PM_REGEXP_OPTION_STATE_INVALID,
343  PM_REGEXP_OPTION_STATE_TOGGLEABLE,
344  PM_REGEXP_OPTION_STATE_ADDABLE,
345  PM_REGEXP_OPTION_STATE_ADDED,
346  PM_REGEXP_OPTION_STATE_REMOVED
347 } pm_regexp_option_state_t;
348 
349 // These are the options that are configurable on the regular expression (or
350 // from within a group).
351 
352 #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
353 #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
354 #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
355 
359 typedef struct {
361  uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
363 
367 static void
368 pm_regexp_options_init(pm_regexp_options_t *options) {
369  memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
370  options->values['i' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
371  options->values['m' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
372  options->values['x' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
373  options->values['d' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
374  options->values['a' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
375  options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
376 }
377 
382 static bool
383 pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
384  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
385  key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
386 
387  switch (options->values[key]) {
388  case PM_REGEXP_OPTION_STATE_INVALID:
389  case PM_REGEXP_OPTION_STATE_REMOVED:
390  return false;
391  case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
392  case PM_REGEXP_OPTION_STATE_ADDABLE:
393  options->values[key] = PM_REGEXP_OPTION_STATE_ADDED;
394  return true;
395  case PM_REGEXP_OPTION_STATE_ADDED:
396  return true;
397  }
398  }
399 
400  return false;
401 }
402 
407 static bool
408 pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
409  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
410  key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
411 
412  switch (options->values[key]) {
413  case PM_REGEXP_OPTION_STATE_INVALID:
414  case PM_REGEXP_OPTION_STATE_ADDABLE:
415  return false;
416  case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
417  case PM_REGEXP_OPTION_STATE_ADDED:
418  case PM_REGEXP_OPTION_STATE_REMOVED:
419  options->values[key] = PM_REGEXP_OPTION_STATE_REMOVED;
420  return true;
421  }
422  }
423 
424  return false;
425 }
426 
430 static uint8_t
431 pm_regexp_options_state(pm_regexp_options_t *options, uint8_t key) {
432  if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
433  key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
434  return options->values[key];
435  }
436 
437  return false;
438 }
439 
461 static bool
462 pm_regexp_parse_group(pm_regexp_parser_t *parser, uint16_t depth) {
463  const uint8_t *group_start = parser->cursor;
464 
465  pm_regexp_options_t options;
466  pm_regexp_options_init(&options);
467 
468  // First, parse any options for the group.
469  if (pm_regexp_char_accept(parser, '?')) {
470  if (pm_regexp_char_is_eof(parser)) {
471  pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
472  return false;
473  }
474 
475  switch (*parser->cursor) {
476  case '#': { // inline comments
477  parser->cursor++;
478  if (pm_regexp_char_is_eof(parser)) {
479  pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern in group");
480  return false;
481  }
482 
483  if (parser->encoding_changed && parser->encoding->multibyte) {
484  bool escaped = false;
485 
486  // Here we're going to take a slow path and iterate through
487  // each multibyte character to find the close paren. We do
488  // this because \ can be a trailing byte in some encodings.
489  while (parser->cursor < parser->end) {
490  if (!escaped && *parser->cursor == ')') {
491  parser->cursor++;
492  return true;
493  }
494 
495  size_t width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
496  if (width == 0) return false;
497 
498  escaped = (width == 1) && (*parser->cursor == '\\');
499  parser->cursor += width;
500  }
501 
502  return false;
503  } else {
504  // Here we can take the fast path and use memchr to find the
505  // next ) because we are safe checking backward for \ since
506  // it cannot be a trailing character.
507  bool found = pm_regexp_char_find(parser, ')');
508 
509  while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
510  found = pm_regexp_char_find(parser, ')');
511  }
512 
513  return found;
514  }
515  }
516  case ':': // non-capturing group
517  case '=': // positive lookahead
518  case '!': // negative lookahead
519  case '>': // atomic group
520  case '~': // absence operator
521  parser->cursor++;
522  break;
523  case '<':
524  parser->cursor++;
525  if (pm_regexp_char_is_eof(parser)) {
526  pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
527  return false;
528  }
529 
530  switch (*parser->cursor) {
531  case '=': // positive lookbehind
532  case '!': // negative lookbehind
533  parser->cursor++;
534  break;
535  default: { // named capture group
536  const uint8_t *start = parser->cursor;
537  if (!pm_regexp_char_find(parser, '>')) {
538  return false;
539  }
540 
541  if (parser->cursor - start == 1) {
542  pm_regexp_parse_error(parser, start, parser->cursor, "group name is empty");
543  }
544 
545  if (parser->name_callback != NULL) {
546  pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
547  }
548 
549  break;
550  }
551  }
552  break;
553  case '\'': { // named capture group
554  const uint8_t *start = ++parser->cursor;
555  if (!pm_regexp_char_find(parser, '\'')) {
556  return false;
557  }
558 
559  if (parser->name_callback != NULL) {
560  pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
561  }
562 
563  break;
564  }
565  case '(': // conditional expression
566  if (!pm_regexp_char_find(parser, ')')) {
567  return false;
568  }
569  break;
570  case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
571  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
572  if (!pm_regexp_options_add(&options, *parser->cursor)) {
573  return false;
574  }
575  parser->cursor++;
576  }
577 
578  if (pm_regexp_char_is_eof(parser)) {
579  return false;
580  }
581 
582  // If we are at the end of the group of options and there is no
583  // subexpression, then we are going to be setting the options
584  // for the parent group. In this case we are safe to return now.
585  if (*parser->cursor == ')') {
586  if (pm_regexp_options_state(&options, 'x') == PM_REGEXP_OPTION_STATE_ADDED) {
587  parser->extended_mode = true;
588  }
589 
590  parser->cursor++;
591  return true;
592  }
593 
594  // If we hit a -, then we're done parsing options.
595  if (*parser->cursor != '-') break;
596 
597  // Otherwise, fallthrough to the - case.
598  /* fallthrough */
599  case '-':
600  parser->cursor++;
601  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
602  if (!pm_regexp_options_remove(&options, *parser->cursor)) {
603  return false;
604  }
605  parser->cursor++;
606  }
607 
608  if (pm_regexp_char_is_eof(parser)) {
609  return false;
610  }
611 
612  // If we are at the end of the group of options and there is no
613  // subexpression, then we are going to be setting the options
614  // for the parent group. In this case we are safe to return now.
615  if (*parser->cursor == ')') {
616  switch (pm_regexp_options_state(&options, 'x')) {
617  case PM_REGEXP_OPTION_STATE_ADDED:
618  parser->extended_mode = true;
619  break;
620  case PM_REGEXP_OPTION_STATE_REMOVED:
621  parser->extended_mode = false;
622  break;
623  }
624 
625  parser->cursor++;
626  return true;
627  }
628 
629  break;
630  default:
631  parser->cursor++;
632  pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "undefined group option");
633  break;
634  }
635  }
636 
637  bool extended_mode = parser->extended_mode;
638  switch (pm_regexp_options_state(&options, 'x')) {
639  case PM_REGEXP_OPTION_STATE_ADDED:
640  parser->extended_mode = true;
641  break;
642  case PM_REGEXP_OPTION_STATE_REMOVED:
643  parser->extended_mode = false;
644  break;
645  }
646 
647  // Now, parse the expressions within this group.
648  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
649  if (!pm_regexp_parse_expression(parser, (uint16_t) (depth + 1))) {
650  parser->extended_mode = extended_mode;
651  return false;
652  }
653  pm_regexp_char_accept(parser, '|');
654  }
655 
656  // Finally, make sure we have a closing parenthesis.
657  parser->extended_mode = extended_mode;
658  if (pm_regexp_char_expect(parser, ')')) return true;
659 
660  pm_regexp_parse_error(parser, group_start, parser->cursor, "end pattern with unmatched parenthesis");
661  return false;
662 }
663 
676 static bool
677 pm_regexp_parse_item(pm_regexp_parser_t *parser, uint16_t depth) {
678  switch (*parser->cursor) {
679  case '^':
680  case '$':
681  parser->cursor++;
682  return pm_regexp_parse_quantifier(parser);
683  case '\\':
684  parser->cursor++;
685  if (!pm_regexp_char_is_eof(parser)) {
686  parser->cursor++;
687  }
688  return pm_regexp_parse_quantifier(parser);
689  case '(':
690  parser->cursor++;
691  return pm_regexp_parse_group(parser, depth) && pm_regexp_parse_quantifier(parser);
692  case '[':
693  parser->cursor++;
694  return pm_regexp_parse_lbracket(parser, depth) && pm_regexp_parse_quantifier(parser);
695  case '*':
696  case '?':
697  case '+':
698  parser->cursor++;
699  pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "target of repeat operator is not specified");
700  return true;
701  case ')':
702  parser->cursor++;
703  pm_regexp_parse_error(parser, parser->cursor - 1, parser->cursor, "unmatched close parenthesis");
704  return true;
705  case '#':
706  if (parser->extended_mode) {
707  if (!pm_regexp_char_find(parser, '\n')) parser->cursor = parser->end;
708  return true;
709  }
710  /* fallthrough */
711  default: {
712  size_t width;
713  if (!parser->encoding_changed) {
714  width = pm_encoding_utf_8_char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
715  } else {
716  width = parser->encoding->char_width(parser->cursor, (ptrdiff_t) (parser->end - parser->cursor));
717  }
718 
719  if (width == 0) return false; // TODO: add appropriate error
720  parser->cursor += width;
721 
722  return pm_regexp_parse_quantifier(parser);
723  }
724  }
725 }
726 
731 static bool
732 pm_regexp_parse_expression(pm_regexp_parser_t *parser, uint16_t depth) {
733  if (depth >= PM_REGEXP_PARSE_DEPTH_MAX) {
734  pm_regexp_parse_error(parser, parser->start, parser->end, "parse depth limit over");
735  return false;
736  }
737 
738  if (!pm_regexp_parse_item(parser, depth)) {
739  return false;
740  }
741 
742  while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
743  if (!pm_regexp_parse_item(parser, depth)) {
744  return false;
745  }
746  }
747 
748  return true;
749 }
750 
757 static bool
758 pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
759  do {
760  if (pm_regexp_char_is_eof(parser)) return true;
761  if (!pm_regexp_parse_expression(parser, 0)) return false;
762  } while (pm_regexp_char_accept(parser, '|'));
763 
764  return pm_regexp_char_is_eof(parser);
765 }
766 
772 pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data) {
773  pm_regexp_parse_pattern(&(pm_regexp_parser_t) {
774  .parser = parser,
775  .start = source,
776  .cursor = source,
777  .end = source + size,
778  .extended_mode = extended_mode,
779  .encoding_changed = parser->encoding_changed,
780  .encoding = parser->encoding,
781  .name_callback = name_callback,
782  .name_data = name_data,
783  .error_callback = error_callback,
784  .error_data = error_data
785  });
786 }
void * pm_memchr(const void *source, int character, size_t number, bool encoding_changed, const pm_encoding_t *encoding)
We need to roll our own memchr to handle cases where the encoding changes and we need to search for a...
Definition: pm_memchr.c:11
void pm_string_shared_init(pm_string_t *string, const uint8_t *start, const uint8_t *end)
Initialize a shared string that is based on initial input.
Definition: pm_string.c:16
PRISM_EXPORTED_FUNCTION void pm_string_free(pm_string_t *string)
Free the associated memory of the given string.
Definition: pm_string.c:368
#define PRISM_EXPORTED_FUNCTION
By default, we compile with -fvisibility=hidden.
Definition: defines.h:50
size_t pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n)
Return the size of the next character in the UTF-8 encoding.
Definition: encoding.c:2287
A regular expression parser.
void(* pm_regexp_error_callback_t)(const uint8_t *start, const uint8_t *end, const char *message, void *data)
This callback is called when a parse error is found.
Definition: regexp.h:27
PRISM_EXPORTED_FUNCTION void pm_regexp_parse(pm_parser_t *parser, const uint8_t *source, size_t size, bool extended_mode, pm_regexp_name_callback_t name_callback, void *name_data, pm_regexp_error_callback_t error_callback, void *error_data)
Parse a regular expression.
Definition: regexp.c:772
void(* pm_regexp_name_callback_t)(const pm_string_t *name, void *data)
This callback is called when a named capture group is found.
Definition: regexp.h:22
This struct defines the functions necessary to implement the encoding interface so we can determine h...
Definition: encoding.h:23
size_t(* char_width)(const uint8_t *b, ptrdiff_t n)
Return the number of bytes that the next character takes if it is valid in the encoding.
Definition: encoding.h:29
bool multibyte
Return true if the encoding is a multibyte encoding.
Definition: encoding.h:61
This struct represents the overall parser.
Definition: parser.h:640
const pm_encoding_t * encoding
The encoding functions for the current file is attached to the parser as it's parsing so that it can ...
Definition: parser.h:755
bool encoding_changed
Whether or not the encoding has been changed by a magic comment.
Definition: parser.h:903
const uint8_t * start
The pointer to the start of the source.
Definition: parser.h:691
This is the set of options that are configurable on the regular expression.
Definition: regexp.c:359
uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS]
The current state of each option.
Definition: regexp.c:361
This is the parser that is going to handle parsing regular expressions.
Definition: regexp.c:8
const uint8_t * cursor
A pointer to the current position in the source.
Definition: regexp.c:16
pm_regexp_error_callback_t error_callback
The callback to call when a parse error is found.
Definition: regexp.c:40
const uint8_t * start
A pointer to the start of the source that we are parsing.
Definition: regexp.c:13
const uint8_t * end
A pointer to the end of the source that we are parsing.
Definition: regexp.c:19
void * name_data
The data to pass to the name callback.
Definition: regexp.c:37
bool extended_mode
Whether or not the regular expression currently being parsed is in extended mode, wherein whitespace ...
Definition: regexp.c:25
pm_parser_t * parser
The parser that is currently being used.
Definition: regexp.c:10
const pm_encoding_t * encoding
The encoding of the source.
Definition: regexp.c:31
void * error_data
The data to pass to the error callback.
Definition: regexp.c:43
pm_regexp_name_callback_t name_callback
The callback to call when a named capture group is found.
Definition: regexp.c:34
bool encoding_changed
Whether the encoding has changed from the default.
Definition: regexp.c:28
A generic string type that can have various ownership semantics.
Definition: pm_string.h:33