00001 #include <stdio.h>
00002 #include <stdlib.h>
00003 #include <string.h>
00004 #include "cpp.h"
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #define MAXSTATE 32
00026 #define ACT(tok,act) ((tok<<7)+act)
00027 #define QBSBIT 0100
00028 #define GETACT(st) (st>>7)&0x1ff
00029
00030
00031 #define C_WS 1
00032 #define C_ALPH 2
00033 #define C_NUM 3
00034 #define C_EOF 4
00035 #define C_XX 5
00036
00037 enum state {
00038 START=0, NUM1, NUM2, NUM3, ID1, ST1, ST2, ST3, COM1, COM2, COM3, COM4,
00039 CC1, CC2, WS1, PLUS1, MINUS1, STAR1, SLASH1, PCT1, SHARP1,
00040 CIRC1, GT1, GT2, LT1, LT2, OR1, AND1, ASG1, NOT1, DOTS1,
00041 S_SELF=MAXSTATE, S_SELFB, S_EOF, S_NL, S_EOFSTR,
00042 S_STNL, S_COMNL, S_EOFCOM, S_COMMENT, S_EOB, S_WS, S_NAME
00043 };
00044
00045 int tottok;
00046 int tokkind[256];
00047 struct fsm {
00048 int state;
00049 uchar ch[4];
00050 int nextstate;
00051 };
00052
00053 struct fsm fsm[] = {
00054
00055 START, { C_XX }, ACT(UNCLASS,S_SELF),
00056 START, { ' ', '\t', '\v' }, WS1,
00057 START, { C_NUM }, NUM1,
00058 START, { '.' }, NUM3,
00059 START, { C_ALPH }, ID1,
00060 START, { 'L' }, ST1,
00061 START, { '"' }, ST2,
00062 START, { '\'' }, CC1,
00063 START, { '/' }, COM1,
00064 START, { EOFC }, S_EOF,
00065 START, { '\n' }, S_NL,
00066 START, { '-' }, MINUS1,
00067 START, { '+' }, PLUS1,
00068 START, { '<' }, LT1,
00069 START, { '>' }, GT1,
00070 START, { '=' }, ASG1,
00071 START, { '!' }, NOT1,
00072 START, { '&' }, AND1,
00073 START, { '|' }, OR1,
00074 START, { '#' }, SHARP1,
00075 START, { '%' }, PCT1,
00076 START, { '[' }, ACT(SBRA,S_SELF),
00077 START, { ']' }, ACT(SKET,S_SELF),
00078 START, { '(' }, ACT(LP,S_SELF),
00079 START, { ')' }, ACT(RP,S_SELF),
00080 START, { '*' }, STAR1,
00081 START, { ',' }, ACT(COMMA,S_SELF),
00082 START, { '?' }, ACT(QUEST,S_SELF),
00083 START, { ':' }, ACT(COLON,S_SELF),
00084 START, { ';' }, ACT(SEMIC,S_SELF),
00085 START, { '{' }, ACT(CBRA,S_SELF),
00086 START, { '}' }, ACT(CKET,S_SELF),
00087 START, { '~' }, ACT(TILDE,S_SELF),
00088 START, { '^' }, CIRC1,
00089
00090
00091 NUM1, { C_XX }, ACT(NUMBER,S_SELFB),
00092 NUM1, { C_NUM, C_ALPH, '.' }, NUM1,
00093 NUM1, { 'E', 'e' }, NUM2,
00094 NUM1, { '_' }, ACT(NUMBER,S_SELFB),
00095
00096
00097 NUM2, { C_XX }, ACT(NUMBER,S_SELFB),
00098 NUM2, { '+', '-' }, NUM1,
00099 NUM2, { C_NUM, C_ALPH }, NUM1,
00100 NUM2, { '_' }, ACT(NUMBER,S_SELFB),
00101
00102
00103 NUM3, { C_XX }, ACT(DOT,S_SELFB),
00104 NUM3, { '.' }, DOTS1,
00105 NUM3, { C_NUM }, NUM1,
00106
00107 DOTS1, { C_XX }, ACT(UNCLASS, S_SELFB),
00108 DOTS1, { C_NUM }, NUM1,
00109 DOTS1, { '.' }, ACT(ELLIPS, S_SELF),
00110
00111
00112 ID1, { C_XX }, ACT(NAME,S_NAME),
00113 ID1, { C_ALPH, C_NUM }, ID1,
00114
00115
00116 ST1, { C_XX }, ACT(NAME,S_NAME),
00117 ST1, { C_ALPH, C_NUM }, ID1,
00118 ST1, { '"' }, ST2,
00119 ST1, { '\'' }, CC1,
00120
00121
00122 ST2, { C_XX }, ST2,
00123 ST2, { '"' }, ACT(STRING, S_SELF),
00124 ST2, { '\\' }, ST3,
00125 ST2, { '\n' }, S_STNL,
00126 ST2, { EOFC }, S_EOFSTR,
00127
00128
00129 ST3, { C_XX }, ST2,
00130 ST3, { '\n' }, S_STNL,
00131 ST3, { EOFC }, S_EOFSTR,
00132
00133
00134 CC1, { C_XX }, CC1,
00135 CC1, { '\'' }, ACT(CCON, S_SELF),
00136 CC1, { '\\' }, CC2,
00137 CC1, { '\n' }, S_STNL,
00138 CC1, { EOFC }, S_EOFSTR,
00139
00140
00141 CC2, { C_XX }, CC1,
00142 CC2, { '\n' }, S_STNL,
00143 CC2, { EOFC }, S_EOFSTR,
00144
00145
00146 COM1, { C_XX }, ACT(SLASH, S_SELFB),
00147 COM1, { '=' }, ACT(ASSLASH, S_SELF),
00148 COM1, { '*' }, COM2,
00149 COM1, { '/' }, COM4,
00150
00151
00152 COM2, { C_XX }, COM2,
00153 COM2, { '\n' }, S_COMNL,
00154 COM2, { '*' }, COM3,
00155 COM2, { EOFC }, S_EOFCOM,
00156
00157
00158 COM3, { C_XX }, COM2,
00159 COM3, { '\n' }, S_COMNL,
00160 COM3, { '*' }, COM3,
00161 COM3, { '/' }, S_COMMENT,
00162
00163
00164 COM4, { C_XX }, COM4,
00165 COM4, { '\n' }, S_NL,
00166 COM4, { EOFC }, S_EOFCOM,
00167
00168
00169 WS1, { C_XX }, S_WS,
00170 WS1, { ' ', '\t', '\v' }, WS1,
00171
00172
00173 MINUS1, { C_XX }, ACT(MINUS, S_SELFB),
00174 MINUS1, { '-' }, ACT(MMINUS, S_SELF),
00175 MINUS1, { '=' }, ACT(ASMINUS,S_SELF),
00176 MINUS1, { '>' }, ACT(ARROW,S_SELF),
00177
00178
00179 PLUS1, { C_XX }, ACT(PLUS, S_SELFB),
00180 PLUS1, { '+' }, ACT(PPLUS, S_SELF),
00181 PLUS1, { '=' }, ACT(ASPLUS, S_SELF),
00182
00183
00184 LT1, { C_XX }, ACT(LT, S_SELFB),
00185 LT1, { '<' }, LT2,
00186 LT1, { '=' }, ACT(LEQ, S_SELF),
00187 LT2, { C_XX }, ACT(LSH, S_SELFB),
00188 LT2, { '=' }, ACT(ASLSH, S_SELF),
00189
00190
00191 GT1, { C_XX }, ACT(GT, S_SELFB),
00192 GT1, { '>' }, GT2,
00193 GT1, { '=' }, ACT(GEQ, S_SELF),
00194 GT2, { C_XX }, ACT(RSH, S_SELFB),
00195 GT2, { '=' }, ACT(ASRSH, S_SELF),
00196
00197
00198 ASG1, { C_XX }, ACT(ASGN, S_SELFB),
00199 ASG1, { '=' }, ACT(EQ, S_SELF),
00200
00201
00202 NOT1, { C_XX }, ACT(NOT, S_SELFB),
00203 NOT1, { '=' }, ACT(NEQ, S_SELF),
00204
00205
00206 AND1, { C_XX }, ACT(AND, S_SELFB),
00207 AND1, { '&' }, ACT(LAND, S_SELF),
00208 AND1, { '=' }, ACT(ASAND, S_SELF),
00209
00210
00211 OR1, { C_XX }, ACT(OR, S_SELFB),
00212 OR1, { '|' }, ACT(LOR, S_SELF),
00213 OR1, { '=' }, ACT(ASOR, S_SELF),
00214
00215
00216 SHARP1, { C_XX }, ACT(SHARP, S_SELFB),
00217 SHARP1, { '#' }, ACT(DSHARP, S_SELF),
00218
00219
00220 PCT1, { C_XX }, ACT(PCT, S_SELFB),
00221 PCT1, { '=' }, ACT(ASPCT, S_SELF),
00222
00223
00224 STAR1, { C_XX }, ACT(STAR, S_SELFB),
00225 STAR1, { '=' }, ACT(ASSTAR, S_SELF),
00226
00227
00228 CIRC1, { C_XX }, ACT(CIRC, S_SELFB),
00229 CIRC1, { '=' }, ACT(ASCIRC, S_SELF),
00230
00231 -1
00232 };
00233
00234
00235
00236 short bigfsm[256][MAXSTATE];
00237
00238 void
00239 expandlex(void)
00240 {
00241 struct fsm *fp;
00242 int i, j, nstate;
00243
00244 for (fp = fsm; fp->state>=0; fp++) {
00245 for (i=0; fp->ch[i]; i++) {
00246 nstate = fp->nextstate;
00247 if (nstate >= S_SELF)
00248 nstate = ~nstate;
00249 switch (fp->ch[i]) {
00250
00251 case C_XX:
00252 for (j=0; j<256; j++)
00253 bigfsm[j][fp->state] = nstate;
00254 continue;
00255 case C_ALPH:
00256 for (j=0; j<=256; j++)
00257 if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
00258 || j=='_')
00259 bigfsm[j][fp->state] = nstate;
00260 continue;
00261 case C_NUM:
00262 for (j='0'; j<='9'; j++)
00263 bigfsm[j][fp->state] = nstate;
00264 continue;
00265 default:
00266 bigfsm[fp->ch[i]][fp->state] = nstate;
00267 }
00268 }
00269 }
00270
00271 for (i=0; i<MAXSTATE; i++) {
00272 for (j=0; j<0xFF; j++)
00273 if (j=='?' || j=='\\') {
00274 if (bigfsm[j][i]>0)
00275 bigfsm[j][i] = ~bigfsm[j][i];
00276 bigfsm[j][i] &= ~QBSBIT;
00277 }
00278 bigfsm[EOB][i] = ~S_EOB;
00279 if (bigfsm[EOFC][i]>=0)
00280 bigfsm[EOFC][i] = ~S_EOF;
00281 }
00282 }
00283
00284 void
00285 fixlex(void)
00286 {
00287
00288 if (Cplusplus==0)
00289 bigfsm['/'][COM1] = bigfsm['x'][COM1];
00290 }
00291
00292
00293
00294
00295
00296
00297
00298
00299 int
00300 gettokens(Tokenrow *trp, int reset)
00301 {
00302 register int c, state, oldstate;
00303 register uchar *ip;
00304 register Token *tp, *maxp;
00305 int runelen;
00306 Source *s = cursource;
00307 int nmac = 0;
00308 extern char outbuf[];
00309
00310 tp = trp->lp;
00311 ip = s->inp;
00312 if (reset) {
00313 s->lineinc = 0;
00314 if (ip>=s->inl) {
00315 s->inl = s->inb;
00316 fillbuf(s);
00317 ip = s->inp = s->inb;
00318 } else if (ip >= s->inb+(3*INS/4)) {
00319 memmove(s->inb, ip, 4+s->inl-ip);
00320 s->inl = s->inb+(s->inl-ip);
00321 ip = s->inp = s->inb;
00322 }
00323 }
00324 maxp = &trp->bp[trp->max];
00325 runelen = 1;
00326 for (;;) {
00327 continue2:
00328 if (tp>=maxp) {
00329 trp->lp = tp;
00330 tp = growtokenrow(trp);
00331 maxp = &trp->bp[trp->max];
00332 }
00333 tp->type = UNCLASS;
00334 tp->hideset = 0;
00335 tp->t = ip;
00336 tp->wslen = 0;
00337 tp->flag = 0;
00338 state = START;
00339 for (;;) {
00340 oldstate = state;
00341 c = *ip;
00342 if ((state = bigfsm[c][state]) >= 0) {
00343 ip += runelen;
00344 runelen = 1;
00345 continue;
00346 }
00347 state = ~state;
00348 reswitch:
00349 switch (state&0177) {
00350 case S_SELF:
00351 ip += runelen;
00352 runelen = 1;
00353 case S_SELFB:
00354 tp->type = GETACT(state);
00355 tp->len = ip - tp->t;
00356 tp++;
00357 goto continue2;
00358
00359 case S_NAME:
00360 tp->type = NAME;
00361 tp->len = ip - tp->t;
00362 nmac |= quicklook(tp->t[0], tp->len>1?tp->t[1]:0);
00363 tp++;
00364 goto continue2;
00365
00366 case S_WS:
00367 tp->wslen = ip - tp->t;
00368 tp->t = ip;
00369 state = START;
00370 continue;
00371
00372 default:
00373 if ((state&QBSBIT)==0) {
00374 ip += runelen;
00375 runelen = 1;
00376 continue;
00377 }
00378 state &= ~QBSBIT;
00379 s->inp = ip;
00380 if (c=='?') {
00381 if (trigraph(s)) {
00382 state = oldstate;
00383 continue;
00384 }
00385 goto reswitch;
00386 }
00387 if (c=='\\') {
00388 if (foldline(s)) {
00389 s->lineinc++;
00390 state = oldstate;
00391 continue;
00392 }
00393 goto reswitch;
00394 }
00395 error(WARNING, "Lexical botch in cpp");
00396 ip += runelen;
00397 runelen = 1;
00398 continue;
00399
00400 case S_EOB:
00401 s->inp = ip;
00402 fillbuf(cursource);
00403 state = oldstate;
00404 continue;
00405
00406 case S_EOF:
00407 tp->type = END;
00408 tp->len = 0;
00409 s->inp = ip;
00410 if (tp!=trp->bp && (tp-1)->type!=NL && cursource->fd!=-1)
00411 error(WARNING,"No newline at end of file");
00412 trp->lp = tp+1;
00413 return nmac;
00414
00415 case S_STNL:
00416 error(ERROR, "Unterminated string or char const");
00417 case S_NL:
00418 tp->t = ip;
00419 tp->type = NL;
00420 tp->len = 1;
00421 tp->wslen = 0;
00422 s->lineinc++;
00423 s->inp = ip+1;
00424 trp->lp = tp+1;
00425 return nmac;
00426
00427 case S_EOFSTR:
00428 error(FATAL, "EOF in string or char constant");
00429 break;
00430
00431 case S_COMNL:
00432 s->lineinc++;
00433 state = COM2;
00434 ip += runelen;
00435 runelen = 1;
00436 if (ip >= s->inb+(7*INS/8)) {
00437 memmove(tp->t, ip, 4+s->inl-ip);
00438 s->inl -= ip-tp->t;
00439 ip = tp->t+1;
00440 }
00441 continue;
00442
00443 case S_EOFCOM:
00444 error(WARNING, "EOF inside comment");
00445 --ip;
00446 case S_COMMENT:
00447 ++ip;
00448 tp->t = ip;
00449 tp->t[-1] = ' ';
00450 tp->wslen = 1;
00451 state = START;
00452 continue;
00453 }
00454 break;
00455 }
00456 ip += runelen;
00457 runelen = 1;
00458 tp->len = ip - tp->t;
00459 tp++;
00460 }
00461 }
00462
00463
00464 int
00465 trigraph(Source *s)
00466 {
00467 int c;
00468
00469 while (s->inp+2 >= s->inl && fillbuf(s)!=EOF)
00470 ;
00471 if (s->inp[1]!='?')
00472 return 0;
00473 c = 0;
00474 switch(s->inp[2]) {
00475 case '=':
00476 c = '#'; break;
00477 case '(':
00478 c = '['; break;
00479 case '/':
00480 c = '\\'; break;
00481 case ')':
00482 c = ']'; break;
00483 case '\'':
00484 c = '^'; break;
00485 case '<':
00486 c = '{'; break;
00487 case '!':
00488 c = '|'; break;
00489 case '>':
00490 c = '}'; break;
00491 case '-':
00492 c = '~'; break;
00493 }
00494 if (c) {
00495 *s->inp = c;
00496 memmove(s->inp+1, s->inp+3, s->inl-s->inp+2);
00497 s->inl -= 2;
00498 }
00499 return c;
00500 }
00501
00502 int
00503 foldline(Source *s)
00504 {
00505 while (s->inp+1 >= s->inl && fillbuf(s)!=EOF)
00506 ;
00507 if (s->inp[1] == '\n') {
00508 memmove(s->inp, s->inp+2, s->inl-s->inp+3);
00509 s->inl -= 2;
00510 return 1;
00511 }
00512 return 0;
00513 }
00514
00515 int
00516 fillbuf(Source *s)
00517 {
00518 int n, nr;
00519
00520 nr = INS/8;
00521 if ((char *)s->inl+nr > (char *)s->inb+INS)
00522 error(FATAL, "Input buffer overflow");
00523 if (s->fd<0 || (n=read(s->fd, (char *)s->inl, INS/8)) <= 0)
00524 n = 0;
00525 if ((*s->inp&0xff) == EOB)
00526 *s->inp = EOFC;
00527 s->inl += n;
00528 s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOB;
00529 if (n==0) {
00530 s->inl[0] = s->inl[1]= s->inl[2]= s->inl[3] = EOFC;
00531 return EOF;
00532 }
00533 return 0;
00534 }
00535
00536
00537
00538
00539
00540
00541 Source *
00542 setsource(char *name, int fd, char *str)
00543 {
00544 Source *s = new(Source);
00545 int len;
00546
00547 s->line = 1;
00548 s->lineinc = 0;
00549 s->fd = fd;
00550 s->filename = name;
00551 s->next = cursource;
00552 s->ifdepth = 0;
00553 cursource = s;
00554
00555 if (str) {
00556 len = strlen(str);
00557 s->inb = domalloc(len+4);
00558 s->inp = s->inb;
00559 strncpy((char *)s->inp, str, len);
00560 } else {
00561 s->inb = domalloc(INS+4);
00562 s->inp = s->inb;
00563 len = 0;
00564 }
00565 s->inl = s->inp+len;
00566 s->inl[0] = s->inl[1] = EOB;
00567 return s;
00568 }
00569
00570 void
00571 unsetsource(void)
00572 {
00573 Source *s = cursource;
00574
00575 if (s->fd>=0) {
00576 close(s->fd);
00577 dofree(s->inb);
00578 }
00579 cursource = s->next;
00580 dofree(s);
00581 }