diff --git a/main.c b/main.c index 383485d..f816df0 100644 --- a/main.c +++ b/main.c @@ -8,6 +8,8 @@ #include #include +#include + enum token_kind { TOKEN_KEYWORD, @@ -58,7 +60,7 @@ nondigit(struct str s) { if (s.size) { char c = s.text[0]; - return((c == '_') || ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')); + return(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || (c == '_')); } return(0); } @@ -1035,7 +1037,9 @@ comment(struct str s) advance(&s, 1); } - return(start - s.size); + if (s.size) { + return(start - s.size); + } } } @@ -1087,14 +1091,15 @@ lex(char *text, int size) } static void -preprocess(char *data, int size) +preprocess_scalar(char *data, int chunk_size, int full_size) { - for (int i = 0; i < size; ++i) { + for (int i = 0; i < chunk_size; ++i) { if (data[i] == '\\') { - for (int j = i + 1; j < size; ++j) { + for (int j = i + 1; j < full_size; ++j) { if (data[j] == '\n') { data[j] = ' '; data[i] = ' '; + break; } else if (data[j] != ' ' && data[j] != '\t') { break; } @@ -1103,6 +1108,26 @@ preprocess(char *data, int size) } } +static void +preprocess(char *data, int size) +{ + int chunk_size = 16; + int whole = size & (~(chunk_size - 1)); + + __m128i mask = _mm_set1_epi8('\\'); + + for (int i = 0; i < whole; i += chunk_size) { + __m128i chunk = _mm_loadu_si128((__m128i *) (data + i)); + __m128i match = _mm_cmpeq_epi8(chunk, mask); + + if (!_mm_testz_si128(match, match)) { + preprocess_scalar(data + i, chunk_size, size - i); + } + } + + preprocess_scalar(data + whole, size - whole, size - whole); +} + static void run(char *data, int size) {