Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
/*
* Regular expression implementation.
* Supports traditional egrep syntax, plus non-greedy operators.
* Tracks submatches a la POSIX.
*
* Seems to work (by running backward!) but very subtle.
* Assumes repetitions are all individually parenthesized:
* must say '(a?)b(c*)' not 'a?bc*'.
*
* Let m = length of regexp (number of states), and
* let p = number of capturing parentheses, and
* let t = length of the text. POSIX via running backward,
* implemented here, requires O(m*p) storage during execution.
* Can implement via running forward instead, but would
* require O(m*p+m*m) storage and is not nearly so simple.
*
* yacc -v nfa-posix.y && gcc y.tab.c
These should be equivalent:
re='ab|cd|ef|a|bc|def|bcde|f'
a.out "(?:$re)(?:$re)($re)" abcdef
a.out "($re)*" abcdef
(0,6)(3,6) => longest last guy (wrong)
(0,6)(5,6) => shortest last guy (wrong)
(0,6)(4,6) => posix last guy (right)
* Copyright (c) 2007 Russ Cox.
* Can be distributed under the MIT license, see bottom of file.
*/
%{
#include
#include
#include
#include
enum
{
NSUB = 20,
MPAREN = 9,
};
typedef struct Sub Sub;
struct Sub
{
char *sp;
char *ep;
};
enum
{
Char = 1,
Any = 2,
Split = 3,
LParen = 4,
RParen = 5,
Match = 6,
};
typedef struct State State;
typedef struct Thread Thread;
struct State
{
int op;
int data;
State *out;
State *out1;
int id;
int lastlist;
Thread *lastthread;
};
struct Thread
{
State *state;
Sub match[NSUB];
};
typedef struct List List;
struct List
{
Thread *t;
int n;
};
int debug;
State matchstate = { Match };
int nstate;
int listid;
List l1, l2;
/* Allocate and initialize State */
State*
state(int op, int data, State *out, State *out1)
{
State *s;
nstate++;
s = malloc(sizeof *s);
s->lastlist = 0;
s->op = op;
s->data = data;
s->out = out;
s->out1 = out1;
s->id = nstate;
return s;
}
typedef struct Frag Frag;
typedef union Ptrlist Ptrlist;
struct Frag
{
State *start;
Ptrlist *out;
};
/* Initialize Frag struct. */
Frag
frag(State *start, Ptrlist *out)
{
Frag n = { start, out };
return n;
}
/*
* Since the out pointers in the list are always
* uninitialized, we use the pointers themselves
* as storage for the Ptrlists.
*/
union Ptrlist
{
Ptrlist *next;
State *s;
};
/* Create singleton list containing just outp. */
Ptrlist*
list1(State **outp)
{
Ptrlist *l;
l = (Ptrlist*)outp;
l->next = NULL;
return l;
}
/* Patch the list of states at out to point to start. */
void
patch(Ptrlist *l, State *s)
{
Ptrlist *next;
for(; l; l=next){
next = l->next;
l->s = s;
}
}
/* Join the two lists l1 and l2, returning the combination. */
Ptrlist*
append(Ptrlist *l1, Ptrlist *l2)
{
Ptrlist *oldl1;
oldl1 = l1;
while(l1->next)
l1 = l1->next;
l1->next = l2;
return oldl1;
}
int nparen;
void yyerror(char*);
int yylex(void);
State *start;
Frag
paren(Frag f, int n)
{
State *s1, *s2;
if(n > MPAREN)
return f;
s1 = state(RParen, n, f.start, NULL);
s2 = state(LParen, n, NULL, NULL);
patch(f.out, s2);
return frag(s1, list1(&s2->out));
}
%}
%union {
Frag frag;
int c;
int nparen;
}
%token CHAR
%token EOL
%type alt concat repeat single line
%type count
%%
line: alt EOL
{
State *s;
$1 = paren($1, 0);
s = state(Match, 0, NULL, NULL);
patch($1.out, s);
start = $1.start;
return 0;
}
alt:
concat
| alt '|' concat
{
State *s = state(Split, 0, $1.start, $3.start);
$$ = frag(s, append($1.out, $3.out));
}
;
concat:
repeat
| concat repeat
{
patch($2.out, $1.start);
$$ = frag($2.start, $1.out);
}
;
repeat:
single
| single '*'
{
State *s = state(Split, 0, $1.start, NULL);
patch($1.out, s);
$$ = frag(s, list1(&s->out1));
}
| single '+'
{
State *s = state(Split, 0, $1.start, NULL);
patch($1.out, s);
$$ = frag($1.start, list1(&s->out1));
}
| single '?'
{
State *s = state(Split, 0, $1.start, NULL);
$$ = frag(s, append($1.out, list1(&s->out1)));
}
;
count: { $$ = ++nparen; }
single:
'(' count alt ')'
{
$$ = paren($3, $2);
}
| '(' '?' ':' alt ')'
{
$$ = $4;
}
| CHAR
{
State *s = state(Char, $1, NULL, NULL);
$$ = frag(s, list1(&s->out));
}
| '.'
{
State *s = state(Any, 0, NULL, NULL);
$$ = frag(s, list1(&s->out));
}
;
%%
char *input;
char *text;
void dumplist(List*);
int
yylex(void)
{
int c;
if(input == NULL || *input == 0)
return EOL;
c = *input++;
if(strchr("|*?():.", c))
return c;
yylval.c = c;
return CHAR;
}
void
yyerror(char *s)
{
fprintf(stderr, "parse error: %s\n", s);
exit(1);
}
void
printmatch(Sub *m, int jump)
{
int i;
for(i=jump-1; i<2*nparen+2; i+=jump){
if(m[i].sp && m[i].ep)
printf("(%d,%d)", m[i].sp - text, m[i].ep - text);
else if(m[i].sp)
printf("(%d,?)", m[i].sp - text);
else
printf("(?,?)");
}
}
void
dumplist(List *l)
{
int i;
Thread *t;
for(i=0; in; i++){
t = &l->t[i];
if(t->state->op != Char && t->state->op != Any && t->state->op != Match)
continue;
printf(" ");
printf("%d ", t->state->id);
printmatch(t->match, 1);
printf("\n");
}
}
/*
* Is match a better than match b?
* If so, return 1; if not, 0.
*/
int
_better(Sub *a, Sub *b)
{
int i;
/* Leftmost longest */
for(i=0; i<2*nparen+2; i++){
if(a[i].sp != b[i].sp)
return b[i].sp == NULL || a[i].sp < b[i].sp;
if(a[i].ep != b[i].ep)
return a[i].ep > b[i].ep;
}
return 0;
}
int
better(Sub *a, Sub *b)
{
int r;
r = _better(a, b);
if(debug > 1){
printf("better? ");
printmatch(a, 1);
printf(" vs ");
printmatch(b, 1);
printf(": %s\n", r ? "yes" : "no");
}
return r;
}
/*
* Add s to l, following unlabeled arrows.
* Next character to read is p.
*/
void
addstate(List *l, State *s, Sub *m, char *p)
{
Sub save0, save1;
if(s == NULL)
return;
if(s->lastlist == listid){
if(!better(m, s->lastthread->match))
return;
}else{
s->lastlist = listid;
s->lastthread = &l->t[l->n++];
}
s->lastthread->state = s;
memmove(s->lastthread->match, m, NSUB*sizeof m[0]);
switch(s->op){
case Split:
/* follow unlabeled arrows */
addstate(l, s->out, m, p);
addstate(l, s->out1, m, p);
break;
case LParen:
save0 = m[2*s->data];
save1 = m[2*s->data+1];
/* record left paren location and keep going */
m[2*s->data].sp = p;
if(save1.sp == NULL)
m[2*s->data+1].sp = p;
addstate(l, s->out, m, p);
/* restore old information before returning. */
m[2*s->data] = save0;
m[2*s->data+1] = save1;
break;
case RParen:
save0 = m[2*s->data];
save1 = m[2*s->data+1];
/* record right paren location and keep going */
m[2*s->data].ep = p;
m[2*s->data].sp = NULL;
if(save1.ep == NULL)
m[2*s->data+1].ep = p;
addstate(l, s->out, m, p);
/* restore old information before returning. */
m[2*s->data] = save0;
m[2*s->data+1] = save1;
break;
}
}
/*
* Step the NFA from the states in clist
* past the character c,
* to create next NFA state set nlist.
* Record best match so far in match.
*/
void
step(List *clist, int c, char *p, List *nlist, Sub *match)
{
int i;
Thread *t;
static Sub m[NSUB];
if(debug){
dumplist(clist);
printf("%c (%d)\n", c, c);
}
listid++;
nlist->n = 0;
for(i=0; in; i++){
t = &clist->t[i];
switch(t->state->op){
case Char:
if(c == t->state->data)
addstate(nlist, t->state->out, t->match, p);
break;
case Any:
addstate(nlist, t->state->out, t->match, p);
break;
case Match:
if(better(t->match, match))
memmove(match, t->match, NSUB*sizeof match[0]);
break;
}
}
/* start a new thread */
if(match == NULL) // || match[0].sp == NULL)
addstate(nlist, start, m, p);
}
/* Compute initial thread list */
List*
startlist(State *start, char *p, List *l)
{
List empty = {NULL, 0};
step(&empty, 0, p, l, NULL);
return l;
}
int
match(State *start, char *p, Sub *m)
{
int c;
List *clist, *nlist, *t;
char *q;
q = p+strlen(p);
clist = startlist(start, q, &l1);
nlist = &l2;
memset(m, 0, NSUB*sizeof m[0]);
while(--q>=p){
c = *q & 0xFF;
step(clist, c, q, nlist, m);
t = clist; clist = nlist; nlist = t;
}
step(clist, 0, p, nlist, m);
return m[0].sp != NULL;
}
void
dump(State *s)
{
if(s == NULL || s->lastlist == listid)
return;
s->lastlist = listid;
printf("%d| ", s->id);
switch(s->op){
case Char:
printf("'%c' -> %d\n", s->data, s->out->id);
break;
case Any:
printf(". -> %d\n", s->out->id);
break;
case Split:
printf("| -> %d, %d\n", s->out->id, s->out1->id);
break;
case LParen:
printf("( %d -> %d\n", s->data, s->out->id);
break;
case RParen:
printf(") %d -> %d\n", s->data, s->out->id);
break;
case Match:
printf("match\n");
break;
default:
printf("??? %d\n", s->op);
break;
}
dump(s->out);
dump(s->out1);
}
int
main(int argc, char **argv)
{
int i;
Sub m[NSUB];
for(;;){
if(argc > 1 && strcmp(argv[1], "-d") == 0){
debug++;
argv[1] = argv[0]; argc--; argv++;
}
else
break;
}
if(argc < 3){
fprintf(stderr, "usage: %s regexp string...\n", argv[0]);
return 1;
}
input = argv[1];
yyparse();
if(nparen >= MPAREN)
nparen = MPAREN;
if(debug){
++listid;
dump(start);
}
l1.t = malloc(nstate*sizeof l1.t[0]);
l2.t = malloc(nstate*sizeof l2.t[0]);
for(i=2; i