|
|
|
@ -10,8 +10,9 @@
@@ -10,8 +10,9 @@
|
|
|
|
|
|
|
|
|
|
#include <immintrin.h> |
|
|
|
|
|
|
|
|
|
enum token_kind { |
|
|
|
|
#define MAX(a, b) ((a) > (b) ? (a) : (b)) |
|
|
|
|
|
|
|
|
|
enum token_kind { |
|
|
|
|
TOKEN_KEYWORD, |
|
|
|
|
TOKEN_IDENTIFIER, |
|
|
|
|
|
|
|
|
@ -58,42 +59,30 @@ advance(struct str *s, int by)
@@ -58,42 +59,30 @@ advance(struct str *s, int by)
|
|
|
|
|
static int |
|
|
|
|
nondigit(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_')); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
digit(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return('0' <= c && c <= '9'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
nonzero_digit(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return('1' <= c && c <= '9'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
octal_digit(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return('0' <= c && c <= '7'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
hexadecimal_prefix(struct str s) |
|
|
|
@ -111,32 +100,23 @@ hexadecimal_prefix(struct str s)
@@ -111,32 +100,23 @@ hexadecimal_prefix(struct str s)
|
|
|
|
|
static int |
|
|
|
|
hexadecimal_digit(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(digit(s) || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f')); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
unsigned_suffix(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c == 'u' || c == 'U'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
long_suffix(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c == 'l' || c == 'L'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
long_long_suffix(struct str s) |
|
|
|
@ -154,22 +134,16 @@ long_long_suffix(struct str s)
@@ -154,22 +134,16 @@ long_long_suffix(struct str s)
|
|
|
|
|
static int |
|
|
|
|
sign(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c == '+' || c == '-'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
floating_suffix(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c == 'f' || c == 'l' || c == 'F' || c == 'L'); |
|
|
|
|
} |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
integer_suffix(struct str s) |
|
|
|
@ -291,11 +265,7 @@ hex_quad(struct str s)
@@ -291,11 +265,7 @@ hex_quad(struct str s)
|
|
|
|
|
static int |
|
|
|
|
universal_character_name(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size < 2) { |
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (s.text[0] == '\\') { |
|
|
|
|
if (s.size >= 2 && s.text[0] == '\\') { |
|
|
|
|
if (s.text[1] == 'u') { |
|
|
|
|
advance(&s, 2); |
|
|
|
|
int hq = hex_quad(s); |
|
|
|
@ -382,7 +352,6 @@ octal_constant(struct str s)
@@ -382,7 +352,6 @@ octal_constant(struct str s)
|
|
|
|
|
{ |
|
|
|
|
int start = s.size; |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
if (s.text[0] == '0') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
@ -396,7 +365,6 @@ octal_constant(struct str s)
@@ -396,7 +365,6 @@ octal_constant(struct str s)
|
|
|
|
|
|
|
|
|
|
return(start - s.size); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
@ -471,7 +439,6 @@ fractional_constant(struct str s)
@@ -471,7 +439,6 @@ fractional_constant(struct str s)
|
|
|
|
|
s.text += ds1; |
|
|
|
|
s.size -= ds1; |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
if (s.text[0] == '.') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
@ -482,7 +449,6 @@ fractional_constant(struct str s)
@@ -482,7 +449,6 @@ fractional_constant(struct str s)
|
|
|
|
|
return(ds1 + ds2 + 1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
@ -492,7 +458,6 @@ exponent_part(struct str s)
@@ -492,7 +458,6 @@ exponent_part(struct str s)
|
|
|
|
|
{ |
|
|
|
|
int start = s.size; |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
if (s.text[0] == 'e' || s.text[0] == 'E') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
@ -505,7 +470,6 @@ exponent_part(struct str s)
@@ -505,7 +470,6 @@ exponent_part(struct str s)
|
|
|
|
|
return(start - s.size); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
@ -564,7 +528,7 @@ hexadecimal_fractional_constant(struct str s)
@@ -564,7 +528,7 @@ hexadecimal_fractional_constant(struct str s)
|
|
|
|
|
|
|
|
|
|
advance(&s, hds1); |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '.') { |
|
|
|
|
if (s.text[0] == '.') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
|
int hds2 = hexadecimal_digit_sequence(s); |
|
|
|
@ -582,7 +546,7 @@ binary_exponent_part(struct str s)
@@ -582,7 +546,7 @@ binary_exponent_part(struct str s)
|
|
|
|
|
{ |
|
|
|
|
int start = s.size; |
|
|
|
|
|
|
|
|
|
if (s.size && (s.text[0] == 'p' || s.text[0] == 'P')) { |
|
|
|
|
if (s.text[0] == 'p' || s.text[0] == 'P') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
|
int sgn = sign(s); |
|
|
|
@ -673,36 +637,25 @@ c_char(struct str s)
@@ -673,36 +637,25 @@ c_char(struct str s)
|
|
|
|
|
return(sym); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c != '\'' && c != '\\' && c != '\n'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
return(c != '\'' && c != '\\' && c != '\n'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
h_char(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c != '\n' && c != '>'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
q_char(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c != '\n' && c != '\"'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
c_char_sequence(struct str s) |
|
|
|
|
{ |
|
|
|
@ -732,7 +685,7 @@ character_constant(struct str s)
@@ -732,7 +685,7 @@ character_constant(struct str s)
|
|
|
|
|
int start = s.size; |
|
|
|
|
int ok = 0; |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '\'') { |
|
|
|
|
if (s.text[0] == '\'') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
ok = 1; |
|
|
|
|
} else if (s.size >= 2 && s.text[0] == 'L' && s.text[1] == '\'') { |
|
|
|
@ -750,7 +703,7 @@ character_constant(struct str s)
@@ -750,7 +703,7 @@ character_constant(struct str s)
|
|
|
|
|
int ccs = c_char_sequence(s); |
|
|
|
|
if (ccs) { |
|
|
|
|
advance(&s, ccs); |
|
|
|
|
if (s.size && s.text[0] == '\'') { |
|
|
|
|
if (s.text[0] == '\'') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
return(start - s.size); |
|
|
|
|
} |
|
|
|
@ -777,8 +730,9 @@ static int
@@ -777,8 +730,9 @@ static int
|
|
|
|
|
whitespace(struct str s) |
|
|
|
|
{ |
|
|
|
|
int start = s.size; |
|
|
|
|
//int spaces = 0x20090a0d;
|
|
|
|
|
|
|
|
|
|
while (s.size && (s.text[0] == ' ' || s.text[0] == '\t' || s.text[0] == '\n' || s.text[0] == '\r')) { |
|
|
|
|
while (s.text[0] == ' ' || s.text[0] == '\t' || s.text[0] == '\n' || s.text[0] == '\r') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -794,14 +748,10 @@ s_char(struct str s)
@@ -794,14 +748,10 @@ s_char(struct str s)
|
|
|
|
|
return(sym); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
return(c != '\"' && c != '\\' && c != '\n'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
s_char_sequence(struct str s) |
|
|
|
|
{ |
|
|
|
@ -832,14 +782,8 @@ encoding_prefix(struct str s)
@@ -832,14 +782,8 @@ encoding_prefix(struct str s)
|
|
|
|
|
return(2); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
if (c == 'u' || c == 'U' || c == 'L') { |
|
|
|
|
return(1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
return(c == 'u' || c == 'U' || c == 'L'); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
@ -850,13 +794,13 @@ string_literal(struct str s)
@@ -850,13 +794,13 @@ string_literal(struct str s)
|
|
|
|
|
int ep = encoding_prefix(s); |
|
|
|
|
advance(&s, ep); |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '\"') { |
|
|
|
|
if (s.text[0] == '\"') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
|
int scs = s_char_sequence(s); |
|
|
|
|
advance(&s, scs); |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '\"') { |
|
|
|
|
if (s.text[0] == '\"') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
return(start - s.size); |
|
|
|
|
} |
|
|
|
@ -908,7 +852,6 @@ punctuator(struct str s)
@@ -908,7 +852,6 @@ punctuator(struct str s)
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (s.size) { |
|
|
|
|
char c = s.text[0]; |
|
|
|
|
if (c == '[' || c == ']' || c == '(' || c == ')' || |
|
|
|
|
c == '{' || c == '}' || c == '.' || c == '&' || |
|
|
|
@ -920,7 +863,6 @@ punctuator(struct str s)
@@ -920,7 +863,6 @@ punctuator(struct str s)
|
|
|
|
|
{ |
|
|
|
|
return(1); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
return(0); |
|
|
|
|
} |
|
|
|
@ -974,27 +916,27 @@ q_char_sequence(struct str s)
@@ -974,27 +916,27 @@ q_char_sequence(struct str s)
|
|
|
|
|
static int |
|
|
|
|
header_name(struct str s) |
|
|
|
|
{ |
|
|
|
|
if (s.size && s.text[0] == '<') { |
|
|
|
|
if (s.text[0] == '<') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
|
int hcs = h_char_sequence(s); |
|
|
|
|
if (hcs) { |
|
|
|
|
advance(&s, hcs); |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '>') { |
|
|
|
|
if (s.text[0] == '>') { |
|
|
|
|
return(hcs + 2); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '\"') { |
|
|
|
|
if (s.text[0] == '\"') { |
|
|
|
|
advance(&s, 1); |
|
|
|
|
|
|
|
|
|
int qcs = q_char_sequence(s); |
|
|
|
|
if (qcs) { |
|
|
|
|
advance(&s, qcs); |
|
|
|
|
|
|
|
|
|
if (s.size && s.text[0] == '\"') { |
|
|
|
|
if (s.text[0] == '\"') { |
|
|
|
|
return(qcs + 2); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -1028,6 +970,28 @@ comment(struct str s)
@@ -1028,6 +970,28 @@ comment(struct str s)
|
|
|
|
|
if (s.text[0] == '/' && s.text[1] == '*') { |
|
|
|
|
/* multi-line comment */ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
advance(&s, 2); |
|
|
|
|
|
|
|
|
|
__m128i mask = _mm_setr_epi8('*', '/', '*', '/', '*', '/', '*', '/', '*', '/', '*', '/', '*', '/', '*', '/'); |
|
|
|
|
|
|
|
|
|
__m128i mask_sus = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '*'); |
|
|
|
|
|
|
|
|
|
while (s.size > 16) { |
|
|
|
|
__m128i chunk = _mm_loadu_si128((__m128i *)(s.text)); |
|
|
|
|
__m128i v1 = _mm_cmpeq_epi16(chunk, mask); |
|
|
|
|
__m128i v2 = _mm_cmpeq_epi16(_mm_bslli_si128(chunk, 1), mask); |
|
|
|
|
__m128i v3 = _mm_cmpeq_epi8(chunk, mask_sus); |
|
|
|
|
__m128i v12 = _mm_or_si128(v1, v2); |
|
|
|
|
__m128i v123 = _mm_or_si128(v12, v3); |
|
|
|
|
|
|
|
|
|
if (!_mm_testz_si128(v123, v123)) { |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
advance(&s, 16); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
while (s.size) { |
|
|
|
|
if (s.size >= 2 && s.text[0] == '*' && s.text[1] == '/') { |
|
|
|
|
advance(&s, 2); |
|
|
|
@ -1057,36 +1021,42 @@ lex(char *text, int size)
@@ -1057,36 +1021,42 @@ lex(char *text, int size)
|
|
|
|
|
|
|
|
|
|
if ((sym = comment(s))) { |
|
|
|
|
//printf("Comment: ");
|
|
|
|
|
} else if ((sym = constant(s))) { |
|
|
|
|
//printf("Constant: ");
|
|
|
|
|
//printf("%.*s\n", sym, s.text);
|
|
|
|
|
} else { |
|
|
|
|
int sym_constant = constant(s); |
|
|
|
|
int sym_punctuator = punctuator(s); |
|
|
|
|
int sym_string = string_literal(s); |
|
|
|
|
int sym_header = header_name(s); |
|
|
|
|
int sym_identifier = identifier(s); |
|
|
|
|
|
|
|
|
|
sym = MAX(sym_constant, MAX(sym_punctuator, MAX(sym_string, MAX(sym_header, sym_identifier)))); |
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
} else if ((sym == pp_number(s))) { |
|
|
|
|
printf("PP number: "); |
|
|
|
|
if (sym == sym_constant) { |
|
|
|
|
printf("Constant: "); |
|
|
|
|
} else if (sym == sym_punctuator) { |
|
|
|
|
printf("Punctuator: "); |
|
|
|
|
} else if (sym == sym_string) { |
|
|
|
|
printf("String: "); |
|
|
|
|
} else if (sym == sym_header) { |
|
|
|
|
printf("Header: "); |
|
|
|
|
} else if (sym == sym_identifier) { |
|
|
|
|
printf("Identifier: "); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
} else if ((sym = punctuator(s))) { |
|
|
|
|
//printf("Punctuator: ");
|
|
|
|
|
} else if ((sym = string_literal(s))) { |
|
|
|
|
//printf("String literal: ");
|
|
|
|
|
} else if ((sym = header_name(s))) { |
|
|
|
|
//printf("Header name: ");
|
|
|
|
|
} else if ((sym = identifier(s))) { |
|
|
|
|
//printf("Identifier: ");
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
//printf("%.*s\n", sym, s.text);
|
|
|
|
|
|
|
|
|
|
if (sym) { |
|
|
|
|
advance(&s, sym); |
|
|
|
|
} else if (s.size) { |
|
|
|
|
} else if (s.size == 1 && s.text[0] == '\0') { |
|
|
|
|
break; |
|
|
|
|
} else { |
|
|
|
|
fprintf(stderr, "Error!\n"); |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return(NULL); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -1169,6 +1139,13 @@ main(int argc, char **argv)
@@ -1169,6 +1139,13 @@ main(int argc, char **argv)
|
|
|
|
|
int size = (int) sb.st_size; |
|
|
|
|
|
|
|
|
|
char *data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); |
|
|
|
|
if (size && data[size - 1] != '\n') { |
|
|
|
|
fprintf(stderr, "No terminating new line. Fuck you!\n"); |
|
|
|
|
return(1); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
data[size - 1] = '\0'; |
|
|
|
|
|
|
|
|
|
if (data == MAP_FAILED) { |
|
|
|
|
perror("mmap"); |
|
|
|
|
return(1); |
|
|
|
|