1 /* Extended regular expression matching and search library.
2 Copyright (C) 1985, 1989 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* To test, compile with -Dtest.
19 This Dtestable feature turns this into a self-contained program
20 which reads a pattern, describes how it compiles,
21 then reads a string and searches for it. */
25 /* The `emacs' switch turns on certain special matching commands
26 that make sense only in emacs. */
36 #include "gdb_string.h"
38 #define malloc xmalloc
41 * Define the syntax stuff, so we can do the \<...\> things.
44 #ifndef Sword /* must be non-zero in some of the tests below... */
48 #define SYNTAX(c) re_syntax_table[c]
52 char *re_syntax_table;
56 static char re_syntax_table[256];
67 memset (re_syntax_table, '\0', sizeof re_syntax_table);
69 for (c = 'a'; c <= 'z'; c++)
70 re_syntax_table[c] = Sword;
72 for (c = 'A'; c <= 'Z'; c++)
73 re_syntax_table[c] = Sword;
75 for (c = '0'; c <= '9'; c++)
76 re_syntax_table[c] = Sword;
81 #endif /* SYNTAX_TABLE */
82 #endif /* not emacs */
84 #include "gnu-regex.h"
86 /* Number of failure points to allocate space for initially,
87 when matching. If this number is exceeded, more space is allocated,
88 so it is not a hard limit. */
92 #endif /* NFAILURES */
94 /* width of a byte in bits */
98 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
99 since ours (we hope) works properly with all combinations of
100 machines, compilers, `char' and `unsigned char' argument types.
101 (Per Bothner suggested the basic approach.) */
102 #undef SIGN_EXTEND_CHAR
104 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
105 #else /* not __STDC__ */
106 /* As in Harbison and Steele. */
107 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
110 static int obscure_syntax = 0;
112 /* Specify the precise syntax of regexp for compilation.
113 This provides for compatibility for various utilities
114 which historically have different, incompatible syntaxes.
116 The argument SYNTAX is a bit-mask containing the two bits
117 RE_NO_BK_PARENS and RE_NO_BK_VBAR. */
120 re_set_syntax (syntax)
125 ret = obscure_syntax;
126 obscure_syntax = syntax;
130 /* re_compile_pattern takes a regular-expression string
131 and converts it into a buffer full of byte commands for matching.
133 PATTERN is the address of the pattern string
134 SIZE is the length of it.
135 BUFP is a struct re_pattern_buffer * which points to the info
136 on where to store the byte commands.
137 This structure contains a char * which points to the
138 actual space, which should have been obtained with malloc.
139 re_compile_pattern may use realloc to grow the buffer space.
141 The number of bytes of commands can be found out by looking in
142 the struct re_pattern_buffer that bufp pointed to,
143 after re_compile_pattern returns.
146 #define PATPUSH(ch) (*b++ = (char) (ch))
148 #define PATFETCH(c) \
149 {if (p == pend) goto end_of_pattern; \
150 c = * (unsigned char *) p++; \
151 if (translate) c = translate[c]; }
153 #define PATFETCH_RAW(c) \
154 {if (p == pend) goto end_of_pattern; \
155 c = * (unsigned char *) p++; }
157 #define PATUNFETCH p--
159 /* This is not an arbitrary limit: the arguments which represent offsets
160 into the pattern are two bytes long. So if 2^16 bytes turns out to
161 be too small, many things would have to change. */
162 #define MAX_BUF_SIZE (1 << 16)
165 /* Extend the buffer by twice its current size via realloc and
166 reset the pointers that pointed into the old block to point to the
167 correct places in the new one. If extending the buffer results in it
168 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
169 #define EXTEND_BUFFER \
171 char *old_buffer = bufp->buffer; \
172 if (bufp->allocated == MAX_BUF_SIZE) \
174 bufp->allocated <<= 1; \
175 if (bufp->allocated > MAX_BUF_SIZE) \
176 bufp->allocated = MAX_BUF_SIZE; \
177 bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated);\
178 if (bufp->buffer == NULL) \
179 goto memory_exhausted; \
180 /* If the buffer moved, move all the pointers into it. */ \
181 if (old_buffer != bufp->buffer) \
183 b = (b - old_buffer) + bufp->buffer; \
184 begalt = (begalt - old_buffer) + bufp->buffer; \
186 fixup_jump = (fixup_jump - old_buffer) + bufp->buffer;\
188 laststart = (laststart - old_buffer) + bufp->buffer; \
190 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
194 static void store_jump (), insert_jump ();
197 re_compile_pattern (pattern, size, bufp)
200 struct re_pattern_buffer *bufp;
202 register char *b = bufp->buffer;
203 register char *p = pattern;
204 char *pend = pattern + size;
205 register unsigned c, c1;
207 unsigned char *translate = (unsigned char *) bufp->translate;
209 /* address of the count-byte of the most recently inserted "exactn" command.
210 This makes it possible to tell whether a new exact-match character
211 can be added to that command or requires a new "exactn" command. */
213 char *pending_exact = 0;
215 /* address of the place where a forward-jump should go
216 to the end of the containing expression.
217 Each alternative of an "or", except the last, ends with a forward-jump
220 char *fixup_jump = 0;
222 /* address of start of the most recently finished expression.
223 This tells postfix * where to find the start of its operand. */
227 /* In processing a repeat, 1 means zero matches is allowed */
231 /* In processing a repeat, 1 means many matches is allowed */
235 /* address of beginning of regexp, or inside of last \( */
239 /* Stack of information saved by \( and restored by \).
240 Four stack elements are pushed by each \(:
241 First, the value of b.
242 Second, the value of fixup_jump.
243 Third, the value of regnum.
244 Fourth, the value of begalt. */
247 int *stackp = stackb;
248 int *stacke = stackb + 40;
251 /* Counts \('s as they are encountered. Remembered for the matching \),
252 where it becomes the "register number" to put in the stop_memory command */
256 bufp->fastmap_accurate = 0;
261 * Initialize the syntax table.
267 if (bufp->allocated == 0)
269 bufp->allocated = 28;
271 /* EXTEND_BUFFER loses when bufp->allocated is 0 */
272 bufp->buffer = (char *) realloc (bufp->buffer, 28);
274 /* Caller did not allocate a buffer. Do it for him */
275 bufp->buffer = (char *) malloc (28);
276 if (!bufp->buffer) goto memory_exhausted;
277 begalt = b = bufp->buffer;
282 if (b - bufp->buffer > bufp->allocated - 10)
283 /* Note that EXTEND_BUFFER clobbers c */
291 if (obscure_syntax & RE_TIGHT_VBAR)
293 if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
295 /* Make operand of last vbar end before this `$'. */
297 store_jump (fixup_jump, jump, b);
303 /* $ means succeed if at end of line, but only in special contexts.
304 If randomly in the middle of a pattern, it is a normal character. */
305 if (p == pend || *p == '\n'
306 || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
307 || (obscure_syntax & RE_NO_BK_PARENS
309 : *p == '\\' && p[1] == ')')
310 || (obscure_syntax & RE_NO_BK_VBAR
312 : *p == '\\' && p[1] == '|'))
320 /* ^ means succeed if at beg of line, but only if no preceding pattern. */
322 if (laststart && p[-2] != '\n'
323 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
325 if (obscure_syntax & RE_TIGHT_VBAR)
328 && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
339 if (obscure_syntax & RE_BK_PLUS_QM)
343 /* If there is no previous pattern, char not special. */
344 if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
346 /* If there is a sequence of repetition chars,
347 collapse it down to equivalent to just one. */
352 zero_times_ok |= c != '+';
353 many_times_ok |= c != '?';
359 else if (!(obscure_syntax & RE_BK_PLUS_QM)
360 && (c == '+' || c == '?'))
362 else if ((obscure_syntax & RE_BK_PLUS_QM)
367 if (!(c1 == '+' || c1 == '?'))
382 /* Star, etc. applied to an empty pattern is equivalent
383 to an empty pattern. */
387 /* Now we know whether 0 matches is allowed,
388 and whether 2 or more matches is allowed. */
391 /* If more than one repetition is allowed,
392 put in a backward jump at the end. */
393 store_jump (b, maybe_finalize_jump, laststart - 3);
396 insert_jump (on_failure_jump, laststart, b + 3, b);
401 /* At least one repetition required: insert before the loop
402 a skip over the initial on-failure-jump instruction */
403 insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
414 while (b - bufp->buffer
415 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
416 /* Note that EXTEND_BUFFER clobbers c */
421 PATPUSH (charset_not), p++;
426 PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
427 /* Clear the whole map */
428 memset (b, '\0', (1 << BYTEWIDTH) / BYTEWIDTH);
429 /* Read in characters and ranges, setting map bits */
433 if (c == ']' && p != p1 + 1) break;
434 if (*p == '-' && p[1] != ']')
439 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
443 b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
446 /* Discard any bitmap bytes that are all 0 at the end of the map.
447 Decrement the map-length byte too. */
448 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
454 if (! (obscure_syntax & RE_NO_BK_PARENS))
460 if (! (obscure_syntax & RE_NO_BK_PARENS))
466 if (! (obscure_syntax & RE_NEWLINE_OR))
472 if (! (obscure_syntax & RE_NO_BK_VBAR))
478 if (p == pend) goto invalid_pattern;
483 if (obscure_syntax & RE_NO_BK_PARENS)
486 if (stackp == stacke) goto nesting_too_deep;
487 if (regnum < RE_NREGS)
489 PATPUSH (start_memory);
492 *stackp++ = b - bufp->buffer;
493 *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
494 *stackp++ = regnum++;
495 *stackp++ = begalt - bufp->buffer;
502 if (obscure_syntax & RE_NO_BK_PARENS)
505 if (stackp == stackb) goto unmatched_close;
506 begalt = *--stackp + bufp->buffer;
508 store_jump (fixup_jump, jump, b);
509 if (stackp[-1] < RE_NREGS)
511 PATPUSH (stop_memory);
512 PATPUSH (stackp[-1]);
517 fixup_jump = *stackp + bufp->buffer - 1;
518 laststart = *--stackp + bufp->buffer;
522 if (obscure_syntax & RE_NO_BK_VBAR)
525 insert_jump (on_failure_jump, begalt, b + 6, b);
529 store_jump (fixup_jump, jump, b);
543 PATPUSH (syntaxspec);
545 PATPUSH (syntax_spec_code[c]);
550 PATPUSH (notsyntaxspec);
552 PATPUSH (syntax_spec_code[c]);
563 PATPUSH (notwordchar);
579 PATPUSH (notwordbound);
602 for (stackt = stackp - 2; stackt > stackb; stackt -= 4)
612 if (obscure_syntax & RE_BK_PLUS_QM)
617 /* You might think it would be useful for \ to mean
618 not to translate; but if we don't translate it
619 it will never match anything. */
620 if (translate) c = translate[c];
627 if (!pending_exact || pending_exact + *pending_exact + 1 != b
628 || *pending_exact == 0177 || *p == '*' || *p == '^'
629 || ((obscure_syntax & RE_BK_PLUS_QM)
630 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
631 : (*p == '+' || *p == '?')))
644 store_jump (fixup_jump, jump, b);
646 if (stackp != stackb) goto unmatched_open;
648 bufp->used = b - bufp->buffer;
652 return "Invalid regular expression";
655 return "Unmatched \\(";
658 return "Unmatched \\)";
661 return "Premature end of regular expression";
664 return "Nesting too deep";
667 return "Regular expression too big";
670 return "Memory exhausted";
673 /* Store where `from' points a jump operation to jump to where `to' points.
674 `opcode' is the opcode to store. */
677 store_jump (from, opcode, to)
682 from[1] = (to - (from + 3)) & 0377;
683 from[2] = (to - (from + 3)) >> 8;
686 /* Open up space at char FROM, and insert there a jump to TO.
687 CURRENT_END gives te end of the storage no in use,
688 so we know how much data to copy up.
689 OP is the opcode of the jump to insert.
691 If you call this function, you must zero out pending_exact. */
694 insert_jump (op, from, to, current_end)
696 char *from, *to, *current_end;
698 register char *pto = current_end + 3;
699 register char *pfrom = current_end;
700 while (pfrom != from)
702 store_jump (from, op, to);
705 /* Given a pattern, compute a fastmap from it.
706 The fastmap records which of the (1 << BYTEWIDTH) possible characters
707 can start a string that matches the pattern.
708 This fastmap is used by re_search to skip quickly over totally implausible text.
710 The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
712 The other components of bufp describe the pattern to be used. */
715 re_compile_fastmap (bufp)
716 struct re_pattern_buffer *bufp;
718 unsigned char *pattern = (unsigned char *) bufp->buffer;
719 int size = bufp->used;
720 register char *fastmap = bufp->fastmap;
721 register unsigned char *p = pattern;
722 register unsigned char *pend = pattern + size;
724 unsigned char *translate = (unsigned char *) bufp->translate;
726 unsigned char *stackb[NFAILURES];
727 unsigned char **stackp = stackb;
729 memset (fastmap, '\0', (1 << BYTEWIDTH));
730 bufp->fastmap_accurate = 1;
731 bufp->can_be_null = 0;
737 bufp->can_be_null = 1;
740 #ifdef SWITCH_ENUM_BUG
741 switch ((int) ((enum regexpcode) *p++))
743 switch ((enum regexpcode) *p++)
748 fastmap[translate[p[1]]] = 1;
767 fastmap[translate['\n']] = 1;
770 if (bufp->can_be_null != 1)
771 bufp->can_be_null = 2;
775 case maybe_finalize_jump:
777 case dummy_failure_jump:
778 bufp->can_be_null = 1;
780 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
781 p += j + 1; /* The 1 compensates for missing ++ above */
784 /* Jump backward reached implies we just went through
785 the body of a loop and matched nothing.
786 Opcode jumped to should be an on_failure_jump.
787 Just treat it like an ordinary jump.
788 For a * loop, it has pushed its failure point already;
789 if so, discard that as redundant. */
790 if ((enum regexpcode) *p != on_failure_jump)
794 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
795 p += j + 1; /* The 1 compensates for missing ++ above */
796 if (stackp != stackb && *stackp == p)
800 case on_failure_jump:
802 j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
813 bufp->can_be_null = 1;
816 for (j = 0; j < (1 << BYTEWIDTH); j++)
819 if (bufp->can_be_null)
821 /* Don't return; check the alternative paths
822 so we can set can_be_null if appropriate. */
826 for (j = 0; j < (1 << BYTEWIDTH); j++)
827 if (SYNTAX (j) == Sword)
832 for (j = 0; j < (1 << BYTEWIDTH); j++)
833 if (SYNTAX (j) != Sword)
840 for (j = 0; j < (1 << BYTEWIDTH); j++)
841 if (SYNTAX (j) == (enum syntaxcode) k)
847 for (j = 0; j < (1 << BYTEWIDTH); j++)
848 if (SYNTAX (j) != (enum syntaxcode) k)
854 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
855 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
858 fastmap[translate[j]] = 1;
865 /* Chars beyond end of map must be allowed */
866 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
868 fastmap[translate[j]] = 1;
872 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
873 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
876 fastmap[translate[j]] = 1;
888 /* Get here means we have successfully found the possible starting characters
889 of one path of the pattern. We need not follow this path any farther.
890 Instead, look at the next alternative remembered in the stack. */
891 if (stackp != stackb)
898 /* Like re_search_2, below, but only one string is specified. */
901 re_search (pbufp, string, size, startpos, range, regs)
902 struct re_pattern_buffer *pbufp;
904 int size, startpos, range;
905 struct re_registers *regs;
907 return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
910 /* Like re_match_2 but tries first a match starting at index STARTPOS,
911 then at STARTPOS + 1, and so on.
912 RANGE is the number of places to try before giving up.
913 If RANGE is negative, the starting positions tried are
914 STARTPOS, STARTPOS - 1, etc.
915 It is up to the caller to make sure that range is not so large
916 as to take the starting position outside of the input strings.
918 The value returned is the position at which the match was found,
919 or -1 if no match was found,
920 or -2 if error (such as failure stack overflow). */
923 re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
924 struct re_pattern_buffer *pbufp;
925 char *string1, *string2;
929 struct re_registers *regs;
932 register char *fastmap = pbufp->fastmap;
933 register unsigned char *translate = (unsigned char *) pbufp->translate;
934 int total = size1 + size2;
937 /* Update the fastmap now if not correct already */
938 if (fastmap && !pbufp->fastmap_accurate)
939 re_compile_fastmap (pbufp);
941 /* Don't waste time in a long search for a pattern
942 that says it is anchored. */
943 if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
954 /* If a fastmap is supplied, skip quickly over characters
955 that cannot possibly be the start of a match.
956 Note, however, that if the pattern can possibly match
957 the null string, we must test it at each starting point
958 so that we take the first null string we get. */
960 if (fastmap && startpos < total && pbufp->can_be_null != 1)
964 register int lim = 0;
965 register unsigned char *p;
967 if (startpos < size1 && startpos + range >= size1)
968 lim = range - (size1 - startpos);
970 p = ((unsigned char *)
971 &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
975 while (range > lim && !fastmap[translate[*p++]])
980 while (range > lim && !fastmap[*p++])
983 startpos += irange - range;
987 register unsigned char c;
988 if (startpos >= size1)
989 c = string2[startpos - size1];
991 c = string1[startpos];
993 if (translate ? !fastmap[translate[c]] : !fastmap[c])
998 if (range >= 0 && startpos == total
999 && fastmap && pbufp->can_be_null == 0)
1002 val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
1012 #endif /* C_ALLOCA */
1016 if (range > 0) range--, startpos++; else range++, startpos--;
1021 #ifndef emacs /* emacs never uses this */
1023 re_match (pbufp, string, size, pos, regs)
1024 struct re_pattern_buffer *pbufp;
1027 struct re_registers *regs;
1029 return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1033 /* Maximum size of failure stack. Beyond this, overflow is an error. */
1035 int re_max_failures = 2000;
1037 static int memcmp_translate();
1038 /* Match the pattern described by PBUFP
1039 against data which is the virtual concatenation of STRING1 and STRING2.
1040 SIZE1 and SIZE2 are the sizes of the two data strings.
1041 Start the match at position POS.
1042 Do not consider matching past the position MSTOP.
1044 If pbufp->fastmap is nonzero, then it had better be up to date.
1046 The reason that the data to match are specified as two components
1047 which are to be regarded as concatenated
1048 is so this function can be used directly on the contents of an Emacs buffer.
1050 -1 is returned if there is no match. -2 is returned if there is
1051 an error (such as match stack overflow). Otherwise the value is the length
1052 of the substring which was matched. */
1055 re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1056 struct re_pattern_buffer *pbufp;
1057 unsigned char *string1, *string2;
1060 struct re_registers *regs;
1063 register unsigned char *p = (unsigned char *) pbufp->buffer;
1064 register unsigned char *pend = p + pbufp->used;
1065 /* End of first string */
1066 unsigned char *end1;
1067 /* End of second string */
1068 unsigned char *end2;
1069 /* Pointer just past last char to consider matching */
1070 unsigned char *end_match_1, *end_match_2;
1071 register unsigned char *d, *dend;
1073 unsigned char *translate = (unsigned char *) pbufp->translate;
1075 /* Failure point stack. Each place that can handle a failure further down the line
1076 pushes a failure point on this stack. It consists of two char *'s.
1077 The first one pushed is where to resume scanning the pattern;
1078 the second pushed is where to resume scanning the strings.
1079 If the latter is zero, the failure point is a "dummy".
1080 If a failure happens and the innermost failure point is dormant,
1081 it discards that failure point and tries the next one. */
1083 unsigned char *initial_stack[2 * NFAILURES];
1084 unsigned char **stackb = initial_stack;
1085 unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1087 /* Information on the "contents" of registers.
1088 These are pointers into the input strings; they record
1089 just what was matched (on this attempt) by some part of the pattern.
1090 The start_memory command stores the start of a register's contents
1091 and the stop_memory command stores the end.
1093 At that point, regstart[regnum] points to the first character in the register,
1094 regend[regnum] points to the first character beyond the end of the register,
1095 regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1096 and regend_seg1[regnum] is true iff regend[regnum] points into string1. */
1098 unsigned char *regstart[RE_NREGS];
1099 unsigned char *regend[RE_NREGS];
1100 unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1102 /* Set up pointers to ends of strings.
1103 Don't allow the second string to be empty unless both are empty. */
1111 end1 = string1 + size1;
1112 end2 = string2 + size2;
1114 /* Compute where to stop matching, within the two strings */
1117 end_match_1 = string1 + mstop;
1118 end_match_2 = string2;
1123 end_match_2 = string2 + mstop - size1;
1126 /* Initialize \) text positions to -1
1127 to mark ones that no \( or \) has been seen for. */
1129 for (mcnt = 0; mcnt < (int) (sizeof (regend) / sizeof (*regend)); mcnt++)
1130 regend[mcnt] = (unsigned char *) -1;
1132 /* `p' scans through the pattern as `d' scans through the data.
1133 `dend' is the end of the input string that `d' points within.
1134 `d' is advanced into the following input string whenever necessary,
1135 but this happens before fetching;
1136 therefore, at the beginning of the loop,
1137 `d' can be pointing at the end of a string,
1138 but it cannot equal string2. */
1141 d = string1 + pos, dend = end_match_1;
1143 d = string2 + pos - size1, dend = end_match_2;
1145 /* Write PREFETCH; just before fetching a character with *d. */
1148 { if (dend == end_match_2) goto fail; /* end of string2 => failure */ \
1149 d = string2; /* end of string1 => advance to string2. */ \
1150 dend = end_match_2; }
1152 /* This loop loops over pattern commands.
1153 It exits by returning from the function if match is complete,
1154 or it drops through if match fails at this starting point in the input data. */
1159 /* End of pattern means we have succeeded! */
1161 /* If caller wants register contents data back, convert it to indices */
1164 regs->start[0] = pos;
1165 if (dend == end_match_1)
1166 regs->end[0] = d - string1;
1168 regs->end[0] = d - string2 + size1;
1169 for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1171 if (regend[mcnt] == (unsigned char *) -1)
1173 regs->start[mcnt] = -1;
1174 regs->end[mcnt] = -1;
1177 if (regstart_seg1[mcnt])
1178 regs->start[mcnt] = regstart[mcnt] - string1;
1180 regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1181 if (regend_seg1[mcnt])
1182 regs->end[mcnt] = regend[mcnt] - string1;
1184 regs->end[mcnt] = regend[mcnt] - string2 + size1;
1187 if (dend == end_match_1)
1188 return (d - string1 - pos);
1190 return d - string2 + size1 - pos;
1193 /* Otherwise match next pattern command */
1194 #ifdef SWITCH_ENUM_BUG
1195 switch ((int) ((enum regexpcode) *p++))
1197 switch ((enum regexpcode) *p++)
1201 /* \( is represented by a start_memory, \) by a stop_memory.
1202 Both of those commands contain a "register number" argument.
1203 The text matched within the \( and \) is recorded under that number.
1204 Then, \<digit> turns into a `duplicate' command which
1205 is followed by the numeric value of <digit> as the register number. */
1209 regstart_seg1[*p++] = (dend == end_match_1);
1214 regend_seg1[*p++] = (dend == end_match_1);
1219 int regno = *p++; /* Get which register to match against */
1220 register unsigned char *d2, *dend2;
1222 d2 = regstart[regno];
1223 dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1224 ? regend[regno] : end_match_1);
1227 /* Advance to next segment in register contents, if necessary */
1230 if (dend2 == end_match_2) break;
1231 if (dend2 == regend[regno]) break;
1232 d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */
1234 /* At end of register contents => success */
1235 if (d2 == dend2) break;
1237 /* Advance to next segment in data being matched, if necessary */
1240 /* mcnt gets # consecutive chars to compare */
1242 if (mcnt > dend2 - d2)
1244 /* Compare that many; failure if mismatch, else skip them. */
1245 if (translate ? memcmp_translate (d, d2, mcnt, translate) : memcmp (d, d2, mcnt))
1247 d += mcnt, d2 += mcnt;
1253 /* fetch a data character */
1255 /* Match anything but a newline. */
1256 if ((translate ? translate[*d++] : *d++) == '\n')
1263 /* Nonzero for charset_not */
1266 if (*(p - 1) == (unsigned char) charset_not)
1269 /* fetch a data character */
1277 if (c < *p * BYTEWIDTH
1278 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1283 if (!not) goto fail;
1289 if (d == string1 || d[-1] == '\n')
1295 || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1299 /* "or" constructs ("|") are handled by starting each alternative
1300 with an on_failure_jump that points to the start of the next alternative.
1301 Each alternative except the last ends with a jump to the joining point.
1302 (Actually, each jump except for the last one really jumps
1303 to the following jump, because tensioning the jumps is a hassle.) */
1305 /* The start of a stupid repeat has an on_failure_jump that points
1306 past the end of the repeat text.
1307 This makes a failure point so that, on failure to match a repetition,
1308 matching restarts past as many repetitions have been found
1309 with no way to fail and look for another one. */
1311 /* A smart repeat is similar but loops back to the on_failure_jump
1312 so that each repetition makes another failure point. */
1314 case on_failure_jump:
1315 if (stackp == stacke)
1317 unsigned char **stackx;
1318 if (stacke - stackb > re_max_failures * 2)
1320 stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1322 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
1323 stackp = stackx + (stackp - stackb);
1324 stacke = stackx + 2 * (stacke - stackb);
1328 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1330 *stackp++ = mcnt + p;
1334 /* The end of a smart repeat has an maybe_finalize_jump back.
1335 Change it either to a finalize_jump or an ordinary jump. */
1337 case maybe_finalize_jump:
1339 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1342 register unsigned char *p2 = p;
1343 /* Compare what follows with the begining of the repeat.
1344 If we can establish that there is nothing that they would
1345 both match, we can change to finalize_jump */
1347 && (*p2 == (unsigned char) stop_memory
1348 || *p2 == (unsigned char) start_memory))
1351 p[-3] = (unsigned char) finalize_jump;
1352 else if (*p2 == (unsigned char) exactn
1353 || *p2 == (unsigned char) endline)
1355 register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1356 register unsigned char *p1 = p + mcnt;
1357 /* p1[0] ... p1[2] are an on_failure_jump.
1358 Examine what follows that */
1359 if (p1[3] == (unsigned char) exactn && p1[5] != c)
1360 p[-3] = (unsigned char) finalize_jump;
1361 else if (p1[3] == (unsigned char) charset
1362 || p1[3] == (unsigned char) charset_not)
1364 int not = p1[3] == (unsigned char) charset_not;
1365 if (c < p1[4] * BYTEWIDTH
1366 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1368 /* not is 1 if c would match */
1369 /* That means it is not safe to finalize */
1371 p[-3] = (unsigned char) finalize_jump;
1376 if (p[-1] != (unsigned char) finalize_jump)
1378 p[-1] = (unsigned char) jump;
1382 /* The end of a stupid repeat has a finalize-jump
1383 back to the start, where another failure point will be made
1384 which will point after all the repetitions found so far. */
1392 mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1393 p += mcnt + 1; /* The 1 compensates for missing ++ above */
1396 case dummy_failure_jump:
1397 if (stackp == stacke)
1399 unsigned char **stackx
1400 = (unsigned char **) alloca (2 * (stacke - stackb)
1402 memcpy (stackx, stackb, (stacke - stackb) * sizeof (char *));
1403 stackp = stackx + (stackp - stackb);
1404 stacke = stackx + 2 * (stacke - stackb);
1412 if (d == string1 /* Points to first char */
1413 || d == end2 /* Points to end */
1414 || (d == end1 && size2 == 0)) /* Points to end */
1416 if ((SYNTAX (d[-1]) == Sword)
1417 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1422 if (d == string1 /* Points to first char */
1423 || d == end2 /* Points to end */
1424 || (d == end1 && size2 == 0)) /* Points to end */
1426 if ((SYNTAX (d[-1]) == Sword)
1427 != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1432 if (d == end2 /* Points to end */
1433 || (d == end1 && size2 == 0) /* Points to end */
1434 || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1436 if (d == string1 /* Points to first char */
1437 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1442 if (d == string1 /* Points to first char */
1443 || SYNTAX (d[-1]) != Sword) /* prev char not letter */
1445 if (d == end2 /* Points to end */
1446 || (d == end1 && size2 == 0) /* Points to end */
1447 || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1453 if (((d - string2 <= (unsigned) size2)
1454 ? d - bf_p2 : d - bf_p1)
1460 if (((d - string2 <= (unsigned) size2)
1461 ? d - bf_p2 : d - bf_p1)
1467 if (((d - string2 <= (unsigned) size2)
1468 ? d - bf_p2 : d - bf_p1)
1481 if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1486 goto matchnotsyntax;
1492 if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1497 if (SYNTAX (*d++) == 0) goto fail;
1502 if (SYNTAX (*d++) != 0) goto fail;
1504 #endif /* not emacs */
1507 if (d == string1) /* Note, d cannot equal string2 */
1508 break; /* unless string1 == string2. */
1512 if (d == end2 || (d == end1 && size2 == 0))
1517 /* Match the next few pattern characters exactly.
1518 mcnt is how many characters to match. */
1525 if (translate[*d++] != *p++) goto fail;
1534 if (*d++ != *p++) goto fail;
1548 continue; /* Successfully matched one pattern command; keep matching */
1550 /* Jump here if any matching operation fails. */
1552 if (stackp != stackb)
1553 /* A restart point is known. Restart there and pop it. */
1556 { /* If innermost failure point is dormant, flush it and keep looking */
1562 if (d >= string1 && d <= end1)
1565 else break; /* Matching at this starting point really fails! */
1567 return -1; /* Failure to match */
1571 memcmp_translate (s1, s2, len, translate)
1572 unsigned char *s1, *s2;
1574 unsigned char *translate;
1576 register unsigned char *p1 = s1, *p2 = s2;
1579 if (translate [*p1++] != translate [*p2++]) return 1;
1585 /* Entry points compatible with bsd4.2 regex library */
1589 static struct re_pattern_buffer re_comp_buf;
1597 if (!re_comp_buf.buffer)
1598 return "No previous regular expression";
1602 if (!re_comp_buf.buffer)
1604 if (!(re_comp_buf.buffer = (char *) malloc (200)))
1605 return "Memory exhausted";
1606 re_comp_buf.allocated = 200;
1607 if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1608 return "Memory exhausted";
1610 return re_compile_pattern (s, strlen (s), &re_comp_buf);
1617 int len = strlen (s);
1618 return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1627 /* Indexed by a character, gives the upper case equivalent of the character */
1629 static char upcase[0400] =
1630 { 000, 001, 002, 003, 004, 005, 006, 007,
1631 010, 011, 012, 013, 014, 015, 016, 017,
1632 020, 021, 022, 023, 024, 025, 026, 027,
1633 030, 031, 032, 033, 034, 035, 036, 037,
1634 040, 041, 042, 043, 044, 045, 046, 047,
1635 050, 051, 052, 053, 054, 055, 056, 057,
1636 060, 061, 062, 063, 064, 065, 066, 067,
1637 070, 071, 072, 073, 074, 075, 076, 077,
1638 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1639 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1640 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1641 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1642 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1643 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1644 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1645 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1646 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1647 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1648 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1649 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1650 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1651 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1652 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1653 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1654 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1655 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1656 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1657 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1658 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1659 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1660 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1661 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1669 struct re_pattern_buffer buf;
1672 char fastmap[(1 << BYTEWIDTH)];
1674 /* Allow a command argument to specify the style of syntax. */
1676 obscure_syntax = atoi (argv[1]);
1679 buf.buffer = (char *) malloc (buf.allocated);
1680 buf.fastmap = fastmap;
1681 buf.translate = upcase;
1689 re_compile_pattern (pat, strlen(pat), &buf);
1691 for (i = 0; i < buf.used; i++)
1692 printchar (buf.buffer[i]);
1694 putchar_unfiltered ('\n');
1696 printf_unfiltered ("%d allocated, %d used.\n", buf.allocated, buf.used);
1698 re_compile_fastmap (&buf);
1699 printf_unfiltered ("Allowed by fastmap: ");
1700 for (i = 0; i < (1 << BYTEWIDTH); i++)
1701 if (fastmap[i]) printchar (i);
1702 putchar_unfiltered ('\n');
1705 gets (pat); /* Now read the string to match against */
1707 i = re_match (&buf, pat, strlen (pat), 0, 0);
1708 printf_unfiltered ("Match value %d.\n", i);
1714 struct re_pattern_buffer *bufp;
1718 printf_unfiltered ("buf is :\n----------------\n");
1719 for (i = 0; i < bufp->used; i++)
1720 printchar (bufp->buffer[i]);
1722 printf_unfiltered ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1724 printf_unfiltered ("Allowed by fastmap: ");
1725 for (i = 0; i < (1 << BYTEWIDTH); i++)
1726 if (bufp->fastmap[i])
1728 printf_unfiltered ("\nAllowed by translate: ");
1729 if (bufp->translate)
1730 for (i = 0; i < (1 << BYTEWIDTH); i++)
1731 if (bufp->translate[i])
1733 printf_unfiltered ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1734 printf_unfiltered ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1741 if (c < 041 || c >= 0177)
1743 putchar_unfiltered ('\\');
1744 putchar_unfiltered (((c >> 6) & 3) + '0');
1745 putchar_unfiltered (((c >> 3) & 7) + '0');
1746 putchar_unfiltered ((c & 7) + '0');
1749 putchar_unfiltered (c);
1755 puts_unfiltered (string);