You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1222 lines
24 KiB
1222 lines
24 KiB
#include <stdio.h> |
|
#include <stdlib.h> |
|
|
|
#include <sys/mman.h> |
|
#include <sys/stat.h> |
|
|
|
#include <unistd.h> |
|
#include <time.h> |
|
#include <fcntl.h> |
|
|
|
enum token_kind { |
|
|
|
TOKEN_KEYWORD, |
|
TOKEN_IDENTIFIER, |
|
|
|
TOKEN_INTEGER_CONSTANT, |
|
TOKEN_FLOATING_CONTANT, |
|
TOKEN_ENUMERATION_CONSTANT, |
|
TOKEN_CHARACTER_CONSTANT, |
|
|
|
TOKEN_STRING_LITERAL, |
|
TOKEN_PUNCTUATOR, |
|
TOKEN_HEADER_NAME, |
|
TOKEN_PP_NUMBER, |
|
|
|
TOKEN_COUNT, |
|
}; |
|
|
|
struct str { |
|
char *text; |
|
int size; |
|
}; |
|
|
|
struct token { |
|
enum token_kind kind; |
|
char *start; |
|
char *end; // one past end |
|
}; |
|
|
|
static int |
|
nondigit(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return((c == '_') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
digit(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return('0' <= c && c <= '9'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
nonzero_digit(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return('1' <= c && c <= '9'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
octal_digit(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return('0' <= c && c <= '7'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_prefix(struct str s) |
|
{ |
|
if (s.size >= 2) { |
|
if (s.text[0] == '0') { |
|
if (s.text[1] == 'x' || s.text[1] == 'X') { |
|
return(2); |
|
} |
|
} |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_digit(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(digit(s) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f')); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
unsigned_suffix(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c == 'u' || c == 'U'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
long_suffix(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c == 'l' || c == 'L'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
long_long_suffix(struct str s) |
|
{ |
|
if (s.size >= 2) { |
|
char c1 = s.text[0]; |
|
char c2 = s.text[1]; |
|
if ((c1 == 'l' && c2 == 'l') || (c1 == 'L' && c2 == 'L')) { |
|
return(2); |
|
} |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
sign(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c == '+' || c == '-'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
floating_suffix(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c == 'f' || c == 'l' || c == 'F' || c == 'L'); |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
integer_suffix(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = unsigned_suffix(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
|
|
int ll = long_long_suffix(s); |
|
|
|
if (ll) { |
|
return(sym + ll); |
|
} |
|
|
|
return(sym + long_suffix(s)); |
|
} else if ((sym = long_long_suffix(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
return(sym + unsigned_suffix(s)); |
|
} else if ((sym = long_suffix(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
return(sym + unsigned_suffix(s)); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
simple_escape_sequence(struct str s) |
|
{ |
|
if (s.size >= 2) { |
|
char c1 = s.text[0]; |
|
if (c1 == '\\') { |
|
char c2 = s.text[1]; |
|
if (c2 == '\'' || c2 == '\"' || c2 == '?' || |
|
c2 == '\\' || c2 == 'a' || c2 == 'b' || |
|
c2 == 'f' || c2 == 'n' || c2 == 'r' || |
|
c2 == 't' || c2 == 'v') { |
|
return(2); |
|
} |
|
} |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
octal_escape_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (s.size && s.text[0] == '\\') { |
|
s.text++; |
|
s.size--; |
|
|
|
if (octal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
if (octal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
if (octal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_escape_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (s.size >= 2) { |
|
if (s.text[0] == '\\' && s.text[1] == 'x') { |
|
s.text += 2; |
|
s.size -= 2; |
|
|
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
for (;;) { |
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
hex_quad(struct str s) |
|
{ |
|
if (s.size >= 4) { |
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
if (hexadecimal_digit(s)) { |
|
return(4); |
|
} |
|
} |
|
} |
|
} |
|
} |
|
return(0); |
|
} |
|
|
|
static int |
|
universal_character_name(struct str s) |
|
{ |
|
if (s.size < 2) { |
|
return(0); |
|
} |
|
|
|
if (s.text[0] == '\\') { |
|
if (s.text[1] == 'u') { |
|
s.text += 2; |
|
s.size -= 2; |
|
int hq = hex_quad(s); |
|
if (hq) { |
|
return(2 + hq); |
|
} |
|
} else if (s.text[1] == 'U') { |
|
s.text += 2; |
|
s.size -= 2; |
|
int hq1 = hex_quad(s); |
|
if (hq1) { |
|
s.text += hq1; |
|
s.size -= hq1; |
|
int hq2 = hex_quad(s); |
|
if (hq2) { |
|
return(2 + hq1 + hq2); |
|
} |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
identifier_nondigit(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = nondigit(s)) || (sym = universal_character_name(s))) { |
|
return(sym); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
identifier(struct str s) |
|
{ |
|
int start = s.size; |
|
int in = identifier_nondigit(s); |
|
|
|
if (in) { |
|
s.text += in; |
|
s.size -= in; |
|
|
|
int sym = 0; |
|
|
|
for (;;) { |
|
if ((sym = identifier_nondigit(s)) || (sym = digit(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
decimal_constant(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (nonzero_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
for (;;) { |
|
if (digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
octal_constant(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (s.size) { |
|
if (s.text[0] == '0') { |
|
s.text++; |
|
s.size--; |
|
|
|
for (;;) { |
|
if (octal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_constant(struct str s) |
|
{ |
|
int start = s.size; |
|
int hp = hexadecimal_prefix(s); |
|
|
|
if (hp) { |
|
s.text += hp; |
|
s.size -= hp; |
|
|
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
for (;;) { |
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
integer_constant(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = hexadecimal_constant(s)) || (sym = octal_constant(s)) || (sym = decimal_constant(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
return(sym + integer_suffix(s)); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
digit_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
for (;;) { |
|
if (digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
fractional_constant(struct str s) |
|
{ |
|
int ds1 = digit_sequence(s); |
|
|
|
s.text += ds1; |
|
s.size -= ds1; |
|
|
|
if (s.size) { |
|
if (s.text[0] == '.') { |
|
s.text++; |
|
s.size--; |
|
|
|
int ds2 = digit_sequence(s); |
|
|
|
s.text += ds2; |
|
s.size -= ds2; |
|
|
|
if (ds1 > 0 || ds2 > 0) { |
|
return(ds1 + ds2 + 1); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
exponent_part(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (s.size) { |
|
if (s.text[0] == 'e' || s.text[0] == 'E') { |
|
s.text++; |
|
s.size--; |
|
|
|
int sgn = sign(s); |
|
s.text += sgn; |
|
s.size -= sgn; |
|
|
|
int ds = digit_sequence(s); |
|
if (ds) { |
|
s.text += ds; |
|
s.size -= ds; |
|
return(start - s.size); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
decimal_floating_constant(struct str s) |
|
{ |
|
int sym = 0; |
|
if ((sym = fractional_constant(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
|
|
int ep = exponent_part(s); |
|
s.text += ep; |
|
s.size -= ep; |
|
|
|
return(sym + ep + floating_suffix(s)); |
|
} else if ((sym = digit_sequence(s))) { |
|
s.text += sym; |
|
s.size -= sym; |
|
|
|
int ep = 0; |
|
|
|
if ((ep = exponent_part(s))) { |
|
s.text += ep; |
|
s.size -= ep; |
|
return(sym + ep + floating_suffix(s)); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_digit_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
|
|
for (;;) { |
|
if (hexadecimal_digit(s)) { |
|
s.text++; |
|
s.size--; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_fractional_constant(struct str s) |
|
{ |
|
int hds1 = hexadecimal_digit_sequence(s); |
|
|
|
s.text += hds1; |
|
s.size -= hds1; |
|
|
|
if (s.size && s.text[0] == '.') { |
|
s.text++; |
|
s.size--; |
|
|
|
int hds2 = hexadecimal_digit_sequence(s); |
|
|
|
if (hds1 > 0 || hds2 > 0) { |
|
return(hds1 + hds2 + 1); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
binary_exponent_part(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (s.size && (s.text[0] == 'p' || s.text[0] == 'P')) { |
|
s.text++; |
|
s.size--; |
|
|
|
int sgn = sign(s); |
|
s.text += sgn; |
|
s.size -= sgn; |
|
|
|
int ds = digit_sequence(s); |
|
if (ds) { |
|
s.size -= ds; |
|
return(start - s.size); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
hexadecimal_floating_constant(struct str s) |
|
{ |
|
int hp = 0; |
|
int start = s.size; |
|
|
|
if ((hp = hexadecimal_prefix(s))) { |
|
s.text += hp; |
|
s.size -= hp; |
|
|
|
int hfc = 0; |
|
int hds = 0; |
|
|
|
if ((hfc = hexadecimal_fractional_constant(s))) { |
|
s.text += hfc; |
|
s.size -= hfc; |
|
int bep = binary_exponent_part(s); |
|
if (bep) { |
|
s.text += bep; |
|
s.size -= bep; |
|
return(start - s.size + floating_suffix(s)); |
|
} |
|
} else if ((hds = hexadecimal_digit_sequence(s))) { |
|
s.text += hds; |
|
s.size -= hds; |
|
int bep = binary_exponent_part(s); |
|
if (bep) { |
|
s.text += bep; |
|
s.size -= bep; |
|
return(start - s.size + floating_suffix(s)); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
floating_constant(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = decimal_floating_constant(s)) || (sym = hexadecimal_floating_constant(s))) { |
|
return(sym); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
#if 0 |
|
static int |
|
enumeration_constant(struct str s) |
|
{ |
|
int i = identifier(s); |
|
return(i); |
|
} |
|
#endif |
|
|
|
static int |
|
escape_sequence(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = simple_escape_sequence(s)) || (sym = octal_escape_sequence(s)) || |
|
(sym = hexadecimal_escape_sequence(s)) || (sym = universal_character_name(s))) { |
|
return(sym); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
c_char(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = escape_sequence(s))) { |
|
return(sym); |
|
} |
|
|
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c != '\'' && c != '\\' && c != '\n'); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
h_char(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c != '\n' && c != '>'); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
q_char(struct str s) |
|
{ |
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c != '\n' && c != '\"'); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
c_char_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
int sc = 0; |
|
|
|
if ((sc = c_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
|
|
for (;;) { |
|
if ((sc = c_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
character_constant(struct str s) |
|
{ |
|
int start = s.size; |
|
int ok = 0; |
|
|
|
if (s.size && s.text[0] == '\'') { |
|
s.text++; |
|
s.size--; |
|
ok = 1; |
|
} else if (s.size >= 2 && s.text[0] == 'L' && s.text[1] == '\'') { |
|
s.text += 2; |
|
s.size -= 2; |
|
ok = 1; |
|
} else if (s.size >= 2 && s.text[0] == 'u' && s.text[1] == '\'') { |
|
s.text += 2; |
|
s.size -= 2; |
|
ok = 1; |
|
} else if (s.size >= 2 && s.text[0] == 'U' && s.text[1] == '\'') { |
|
s.text += 2; |
|
s.size -= 2; |
|
ok = 1; |
|
} |
|
|
|
if (ok) { |
|
int ccs = c_char_sequence(s); |
|
if (ccs) { |
|
s.text += ccs; |
|
s.size -= ccs; |
|
if (s.size && s.text[0] == '\'') { |
|
s.size--; |
|
return(start - s.size); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
constant(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = floating_constant(s)) || (sym = integer_constant(s)) || (sym = character_constant(s))) { |
|
/* || (sym = enumeration_constant(s)) */ |
|
return(sym); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
whitespace(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
while (s.size && (s.text[0] == ' ' || s.text[0] == '\t' || s.text[0] == '\n' || s.text[0] == '\r')) { |
|
s.text++; |
|
s.size--; |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
static int |
|
s_char(struct str s) |
|
{ |
|
int sym = 0; |
|
|
|
if ((sym = escape_sequence(s))) { |
|
return(sym); |
|
} |
|
|
|
if (s.size) { |
|
char c = s.text[0]; |
|
return(c != '\"' && c != '\\' && c != '\n'); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
s_char_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
int sc = 0; |
|
|
|
if ((sc = s_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
|
|
for (;;) { |
|
if ((sc = s_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
encoding_prefix(struct str s) |
|
{ |
|
if (s.size >= 2 && s.text[0] == 'u' && s.text[1] == '8') { |
|
return(2); |
|
} |
|
|
|
if (s.size) { |
|
char c = s.text[0]; |
|
if (c == 'u' || c == 'U' || c == 'L') { |
|
return(1); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
string_literal(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
int ep = encoding_prefix(s); |
|
s.text += ep; |
|
s.size -= ep; |
|
|
|
if (s.size && s.text[0] == '\"') { |
|
s.text++; |
|
s.size--; |
|
|
|
int scs = s_char_sequence(s); |
|
s.text += scs; |
|
s.size -= scs; |
|
|
|
if (s.size && s.text[0] == '\"') { |
|
s.size--; |
|
return(start - s.size); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
punctuator(struct str s) |
|
{ |
|
if (s.size >= 4) { |
|
if (s.text[0] == '%' && s.text[1] == ':' && s.text[2] == '%' && s.text[3] == ':') |
|
{ |
|
return(4); |
|
} |
|
} |
|
|
|
if (s.size >= 3) { |
|
char c1 = s.text[0]; |
|
char c2 = s.text[1]; |
|
char c3 = s.text[2]; |
|
if ((c1 == '.' && c2 == '.' && c3 == '.') || |
|
(c1 == '<' && c2 == '<' && c3 == '=') || |
|
(c1 == '>' && c2 == '>' && c3 == '=')) |
|
{ |
|
return(3); |
|
} |
|
} |
|
|
|
if (s.size >= 2) { |
|
char c1 = s.text[0]; |
|
char c2 = s.text[1]; |
|
if ((c1 == '-' && c2 == '>') || (c1 == '+' && c2 == '+') || |
|
(c1 == '-' && c2 == '-') || (c1 == '<' && c2 == '<') || |
|
(c1 == '>' && c2 == '>') || (c1 == '<' && c2 == '=') || |
|
(c1 == '>' && c2 == '=') || (c1 == '=' && c2 == '=') || |
|
(c1 == '!' && c2 == '=') || (c1 == '&' && c2 == '&') || |
|
(c1 == '|' && c2 == '|') || (c1 == '*' && c2 == '=') || |
|
(c1 == '/' && c2 == '=') || (c1 == '%' && c2 == '=') || |
|
(c1 == '+' && c2 == '=') || (c1 == '-' && c2 == '=') || |
|
(c1 == '&' && c2 == '=') || (c1 == '^' && c2 == '=') || |
|
(c1 == '|' && c2 == '=') || (c1 == '#' && c2 == '#') || |
|
(c1 == '<' && c2 == ':') || (c1 == ':' && c2 == '>') || |
|
(c1 == '<' && c2 == '%') || (c1 == '>' && c2 == '%') || |
|
(c1 == '%' && c2 == ':')) |
|
{ |
|
return(2); |
|
} |
|
} |
|
|
|
if (s.size) { |
|
char c = s.text[0]; |
|
if (c == '[' || c == ']' || c == '(' || c == ')' || |
|
c == '{' || c == '}' || c == '.' || c == '&' || |
|
c == '*' || c == '+' || c == '-' || c == '~' || |
|
c == '!' || c == '/' || c == '%' || c == '<' || |
|
c == '>' || c == '^' || c == '|' || c == '?' || |
|
c == ':' || c == ';' || c == '=' || c == ',' || |
|
c == '#') |
|
{ |
|
return(1); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
h_char_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
int sc = 0; |
|
|
|
if ((sc = h_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
|
|
for (;;) { |
|
if ((sc = h_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
q_char_sequence(struct str s) |
|
{ |
|
int start = s.size; |
|
int sc = 0; |
|
|
|
if ((sc = q_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
|
|
for (;;) { |
|
if ((sc = q_char(s))) { |
|
s.text += sc; |
|
s.size -= sc; |
|
} else { |
|
break; |
|
} |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
header_name(struct str s) |
|
{ |
|
if (s.size && s.text[0] == '<') { |
|
s.text++; |
|
s.size--; |
|
|
|
int hcs = h_char_sequence(s); |
|
if (hcs) { |
|
s.text += hcs; |
|
s.size -= hcs; |
|
|
|
if (s.size && s.text[0] == '>') { |
|
return(hcs + 2); |
|
} |
|
} |
|
} |
|
|
|
if (s.size && s.text[0] == '\"') { |
|
s.text++; |
|
s.size--; |
|
|
|
int qcs = q_char_sequence(s); |
|
if (qcs) { |
|
s.text += qcs; |
|
s.size -= qcs; |
|
|
|
if (s.size && s.text[0] == '\"') { |
|
return(qcs + 2); |
|
} |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static int |
|
comment(struct str s) |
|
{ |
|
int start = s.size; |
|
|
|
if (s.size >= 2) { |
|
if (s.text[0] == '/' && s.text[1] == '/') { |
|
/* single-line comment */ |
|
s.text += 2; |
|
s.size -= 2; |
|
|
|
while (s.size) { |
|
if (s.text[0] == '\n') { |
|
s.text++; |
|
s.size--; |
|
break; |
|
} |
|
|
|
s.text++; |
|
s.size--; |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
|
|
if (s.text[0] == '/' && s.text[1] == '*') { |
|
/* multi-line comment */ |
|
|
|
while (s.size) { |
|
if (s.size >= 2 && s.text[0] == '*' && s.text[1] == '/') { |
|
s.text += 2; |
|
s.size -= 2; |
|
break; |
|
} |
|
|
|
s.text++; |
|
s.size--; |
|
} |
|
|
|
return(start - s.size); |
|
} |
|
} |
|
|
|
return(0); |
|
} |
|
|
|
static struct token * |
|
lex(char *text, int size) |
|
{ |
|
struct str s = { text, size }; |
|
|
|
while (s.size) { |
|
int sym = whitespace(s); |
|
s.text += sym; |
|
s.size -= sym; |
|
|
|
if ((sym = comment(s))) { |
|
//printf("Comment: "); |
|
} else if ((sym = constant(s))) { |
|
//printf("Constant: "); |
|
|
|
|
|
#if 0 |
|
} else if ((sym == pp_number(s))) { |
|
printf("PP number: "); |
|
#endif |
|
|
|
} else if ((sym = punctuator(s))) { |
|
//printf("Punctuator: "); |
|
} else if ((sym = string_literal(s))) { |
|
//printf("String literal: "); |
|
} else if ((sym = header_name(s))) { |
|
//printf("Header name: "); |
|
} else if ((sym = identifier(s))) { |
|
//printf("Identifier: "); |
|
} |
|
|
|
//printf("%.*s\n", sym, s.text); |
|
|
|
if (sym) { |
|
s.text += sym; |
|
s.size -= sym; |
|
} else if (s.size) { |
|
fprintf(stderr, "Error!\n"); |
|
break; |
|
} |
|
} |
|
|
|
|
|
return(NULL); |
|
} |
|
|
|
static void |
|
preprocess(char *data, int size) |
|
{ |
|
for (int i = 0; i < size; ++i) { |
|
if (data[i] == '\\') { |
|
for (int j = i + 1; j < size; ++j) { |
|
if (data[j] == '\n') { |
|
data[j] = ' '; |
|
data[i] = ' '; |
|
} else if (data[j] != ' ' && data[j] != '\t') { |
|
break; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
static void |
|
run(char *data, int size) |
|
{ |
|
struct timespec tp = { 0 }; |
|
|
|
clock_gettime(CLOCK_MONOTONIC_RAW, &tp); |
|
unsigned long long before = tp.tv_sec * 1000000ULL + tp.tv_nsec / 1000ULL; |
|
|
|
preprocess(data, size); |
|
struct token *tokens = lex(data, size); |
|
(void) tokens; |
|
|
|
clock_gettime(CLOCK_MONOTONIC_RAW, &tp); |
|
|
|
unsigned long long after = tp.tv_sec * 1000000ULL + tp.tv_nsec / 1000ULL; |
|
|
|
unsigned long long dt = after - before; |
|
|
|
fprintf(stderr, "%.2fms, %.2fMB/s\n", (float) dt / 1000, (float) size / dt); |
|
} |
|
|
|
int |
|
main(int argc, char **argv) |
|
{ |
|
if (argc != 2) { |
|
fprintf(stderr, "Usage: %s input_file.c\n", argv[0]); |
|
return(1); |
|
} |
|
|
|
char *file = argv[1]; |
|
int fd = open(file, O_RDONLY); |
|
|
|
if (fd == -1) { |
|
perror("open"); |
|
return(1); |
|
} |
|
|
|
struct stat sb = { 0 }; |
|
if (fstat(fd, &sb) == -1) { |
|
perror("fstat"); |
|
return(1); |
|
} |
|
|
|
int size = (int) sb.st_size; |
|
|
|
char *data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); |
|
if (data == MAP_FAILED) { |
|
perror("mmap"); |
|
return(1); |
|
} |
|
|
|
run(data, size); |
|
|
|
return(0); |
|
} |