130#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
131#define OPERAND(p) ((p) + 3)
142#define UCHARAT(p) ((int)*(unsigned char *)(p))
144#define UCHARAT(p) ((int)*(p)&CHARBITS)
147#define FAIL(m) { hs_regerror(m); return(NULL); }
148#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
161static const char *regparse;
174STATIC
char *regbranch(
int *
flagp);
175STATIC
char *regpiece(
int *
flagp);
176STATIC
char *regatom(
int *
flagp);
177STATIC
char *regnode(
char op);
178STATIC
char *regnext(
register char *p);
179STATIC
void regc(
char b);
180STATIC
void reginsert(
char op,
char *
opnd);
181STATIC
void regtail(
char *p,
char *val);
182STATIC
void regoptail(
char *p,
char *val);
203hs_regcomp(
const char *
exp)
211 FAIL(
"NULL argument");
215 if (
exp[0] ==
'.' &&
exp[1] ==
'*')
exp += 2;
222 if (reg(0, &flags) == NULL)
226 if (regsize >= 32767L)
227 FAIL(
"regexp too big");
232 FAIL(
"out of space");
237 regcode = r->program;
239 if (reg(0, &flags) == NULL)
249 if (OP(regnext(
scan)) == END) {
253 if (OP(
scan) == EXACTLY)
254 r->regstart = *OPERAND(
scan);
255 else if (OP(
scan) == BOL)
270 if ((OP(
scan) == EXACTLY) &&
307 if (regnpar >= NSUBEXP)
316 br = regbranch(&flags);
323 if (!(flags&HASWIDTH))
325 *
flagp |= flags&SPSTART;
326 while (*regparse ==
'|' || *regparse ==
'\n') {
328 br = regbranch(&flags);
332 if (!(flags&HASWIDTH))
334 *
flagp |= flags&SPSTART;
346 if (
paren && *regparse++ !=
')') {
347 FAIL(
"unmatched ()");
348 }
else if (!
paren && *regparse !=
'\0') {
349 if (*regparse ==
')') {
350 FAIL(
"unmatched ()");
374 ret = regnode(BRANCH);
376 while (*regparse !=
'\0' && *regparse !=
')' &&
377 *regparse !=
'\n' && *regparse !=
'|') {
378 latest = regpiece(&flags);
381 *
flagp |= flags&HASWIDTH;
383 *
flagp |= flags&SPSTART;
389 (
void) regnode(NOTHING);
411 ret = regatom(&flags);
421 if (!(flags&HASWIDTH) && op !=
'?')
422 FAIL(
"*+ operand could be empty");
423 *
flagp = (op !=
'+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
425 if (op ==
'*' && (flags&SIMPLE))
426 reginsert(STAR,
ret);
427 else if (op ==
'*') {
429 reginsert(BRANCH,
ret);
430 regoptail(
ret, regnode(BACK));
432 regtail(
ret, regnode(BRANCH));
433 regtail(
ret, regnode(NOTHING));
434 }
else if (op ==
'+' && (flags&SIMPLE))
435 reginsert(PLUS,
ret);
436 else if (op ==
'+') {
438 next = regnode(BRANCH);
440 regtail(regnode(BACK),
ret);
441 regtail(next, regnode(BRANCH));
442 regtail(
ret, regnode(NOTHING));
443 }
else if (op ==
'?') {
445 reginsert(BRANCH,
ret);
446 regtail(
ret, regnode(BRANCH));
447 next = regnode(NOTHING);
449 regoptail(
ret, next);
452 if (ISMULT(*regparse))
474 switch (*regparse++) {
484 *
flagp |= HASWIDTH|SIMPLE;
490 if (*regparse ==
'^') {
491 ret = regnode(ANYBUT);
494 ret = regnode(ANYOF);
495 if (*regparse ==
']' || *regparse ==
'-')
497 while (*regparse !=
'\0' && *regparse !=
']') {
498 if (*regparse ==
'-') {
500 if (*regparse ==
']' || *regparse ==
'\0')
503 class1 = UCHARAT(regparse-2)+1;
506 FAIL(
"invalid [] range");
515 if (*regparse !=
']')
516 FAIL(
"unmatched []");
518 *
flagp |= HASWIDTH|SIMPLE;
522 ret = reg(1, &flags);
525 *
flagp |= flags&(HASWIDTH|SPSTART);
531 FAIL(
"internal urp");
536 FAIL(
"?+* follows nothing");
539 switch (*regparse++) {
544 ret = regnode(WORDA);
547 ret = regnode(WORDZ);
586 ret = regnode(EXACTLY);
595 case '.':
case '[':
case '(':
596 case ')':
case '|':
case '\n':
604 case '?':
case '+':
case '*':
613 switch (regparse[1]){
650 if (
ret == ®dummy) {
670 if (regcode != ®dummy)
682reginsert(
char op,
char *
opnd)
688 if (regcode == ®dummy) {
709regtail(
char *p,
char *val)
727 if (OP(
scan) == BACK)
731 *(
scan+1) = (offset>>8)&0377;
732 *(
scan+2) = offset&0377;
739regoptail(
char *p,
char *val)
742 if (p == NULL || p == ®dummy || OP(p) != BRANCH)
744 regtail(OPERAND(p), val);
754static const char *reginput;
755static const char *regbol;
756static const char **regstartp;
757static const char **regendp;
762STATIC
int regtry(
hs_regexp *prog,
const char *
string);
763STATIC
int regmatch(
char *prog);
764STATIC
int regrepeat(
char *p);
776hs_regexec(
const hs_regexp *prog,
const char *
string)
781 if (prog == NULL ||
string == NULL) {
782 hs_regerror(
"NULL parameter");
787 if (UCHARAT(prog->program) != MAGIC) {
788 hs_regerror(
"corrupted program");
793 if (prog->regmust != NULL) {
795 while ((s =
strchr(s, prog->regmust[0])) != NULL) {
796 if (
strncmp(s, prog->regmust, prog->regmlen) == 0)
805 regbol = (
char *)
string;
809 return(regtry((
hs_regexp *)prog, string));
813 if (prog->regstart !=
'\0')
815 while ((s =
strchr(s, prog->regstart)) != NULL) {
825 }
while (*s++ !=
'\0');
835regtry(
hs_regexp *prog,
const char *
string)
842 regstartp = (
const char **)prog->startp;
843 regendp = (
const char **)prog->endp;
847 for (i = NSUBEXP; i > 0; i--) {
851 if (regmatch(prog->program + 1)) {
852 prog->startp[0] = (
char *)
string;
853 prog->endp[0] = (
char *)reginput;
880 while (
scan != NULL) {
885 next = regnext(
scan);
889 if (reginput != regbol)
893 if (*reginput !=
'\0')
898 if ((!
isalnum((
int)*reginput)) && *reginput !=
'_')
901 if (reginput > regbol &&
902 (
isalnum((
int)reginput[-1]) || reginput[-1] ==
'_'))
907 if (
isalnum((
int)*reginput) || *reginput ==
'_')
912 if (*reginput ==
'\0')
922 if (*
opnd != *reginput)
931 if (*reginput ==
'\0' ||
strchr(OPERAND(
scan), *reginput) == NULL)
936 if (*reginput ==
'\0' ||
strchr(OPERAND(
scan), *reginput) != NULL)
956 no = OP(
scan) - OPEN;
959 if (regmatch(next)) {
965 if (regstartp[no] == NULL)
966 regstartp[no] = save;
984 no = OP(
scan) - CLOSE;
987 if (regmatch(next)) {
993 if (regendp[no] == NULL)
1003 if (OP(next) != BRANCH)
1004 next = OPERAND(
scan);
1008 if (regmatch(OPERAND(
scan)))
1012 }
while (
scan != NULL && OP(
scan) == BRANCH);
1030 if (OP(next) == EXACTLY)
1032 min = (OP(
scan) == STAR) ? 0 : 1;
1035 no = regrepeat(OPERAND(
scan));
1045 reginput = save + no;
1054 hs_regerror(
"memory corruption");
1066 hs_regerror(
"corrupted pointers");
1106 hs_regerror(
"internal foulup");
1158 printf(
"(%d)", (s-r->program)+(next-s));
1160 if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
1162 while (*s !=
'\0') {
1172 if (r->regstart !=
'\0')
1173 printf(
"start `%c' ", r->regstart);
1176 if (r->regmust != NULL)
1177 printf(
"must have \"%s\"", r->regmust);
1188 static char buf[50];
1260 hs_regerror(
"corrupted opcode");