You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
298 lines
8.7 KiB
298 lines
8.7 KiB
// clang -Oz --target=wasm32 -nostdlib -Wl,--export-all,--no-entry -msimd128 lod.c -o lod.wasm |
|
|
|
#include <wasm_simd128.h> |
|
|
|
extern char __heap_base; |
|
|
|
static int allocated_static; |
|
static int allocated_dynamic; |
|
|
|
void |
|
free_static(void) |
|
{ |
|
allocated_static = 0; |
|
} |
|
|
|
void |
|
free_dynamic(void) |
|
{ |
|
allocated_dynamic = 0; |
|
} |
|
|
|
void * |
|
alloc_static(int size) |
|
{ |
|
void *result = &__heap_base + allocated_static; |
|
allocated_static += size; |
|
return(result); |
|
} |
|
|
|
void * |
|
alloc_dynamic(int size) |
|
{ |
|
void *result = &__heap_base + allocated_static + allocated_dynamic; |
|
allocated_dynamic += size; |
|
return(result); |
|
} |
|
|
|
static int |
|
rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coords_from, |
|
int segment_start, int segment_end) |
|
{ |
|
int result = -1; |
|
|
|
if (segment_start == segment_end) { |
|
return(result); |
|
} |
|
|
|
float EPS = 0.125f / zoom * 255.0f; |
|
float max_dist = 0.0f; |
|
|
|
float ax = xs[coords_from + segment_start]; |
|
float ay = ys[coords_from + segment_start]; |
|
float bx = xs[coords_from + segment_end]; |
|
float by = ys[coords_from + segment_end]; |
|
|
|
unsigned char ap = pressures[coords_from / 2 + segment_start]; |
|
unsigned char bp = pressures[coords_from / 2 + segment_end]; |
|
|
|
float dx = bx - ax; |
|
float dy = by - ay; |
|
|
|
float dist_ab = __builtin_sqrtf(dx * dx + dy * dy); |
|
float dir_nx = dy / dist_ab * 255.0f; |
|
float dir_ny = -dx / dist_ab * 255.0f; |
|
#if 1 |
|
for (int i = segment_start + 1; i < segment_end; ++i) { |
|
float px = xs[coords_from + i]; |
|
float py = ys[coords_from + i]; |
|
|
|
unsigned char pp = pressures[coords_from + i]; |
|
|
|
float apx = px - ax; |
|
float apy = py - ay; |
|
|
|
float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) |
|
+ __builtin_abs(pp - ap) + __builtin_abs(pp - bp); |
|
|
|
if (dist > EPS && dist > max_dist) { |
|
result = i; |
|
max_dist = dist; |
|
} |
|
} |
|
#else |
|
v128_t eps_x4 = wasm_f32x4_splat(EPS); |
|
v128_t ax_x4 = wasm_f32x4_splat(ax); |
|
v128_t ay_x4 = wasm_f32x4_splat(ay); |
|
v128_t ap_x4 = wasm_f32x4_splat(ap); |
|
v128_t bp_x4 = wasm_f32x4_splat(bp); |
|
v128_t dir_nx_x4 = wasm_f32x4_splat(dir_nx); |
|
v128_t dir_ny_x4 = wasm_f32x4_splat(dir_ny); |
|
|
|
v128_t index_x4 = wasm_u32x4_make(segment_start + 1, segment_start + 2, segment_start + 3, segment_start + 4); |
|
v128_t four_x4 = wasm_u32x4_const_splat(4); |
|
v128_t max_vals_x4 = wasm_f32x4_const_splat(0.0f); |
|
v128_t max_index_x4 = wasm_u32x4_const_splat(-1); |
|
|
|
for (int i = segment_start + 1; i < segment_end - 3; i += 4) { |
|
v128_t px_x4 = wasm_v128_load(xs + coords_from + i); |
|
v128_t py_x4 = wasm_v128_load(ys + coords_from + i); |
|
v128_t pp_x16 = wasm_v128_load(pressures + coords_from / 2 + i); |
|
|
|
// Take 4 highest bytes and convert to float |
|
v128_t pp_x8 = wasm_u16x8_extend_high_u8x16(pp_x16); |
|
v128_t pp_x4i = wasm_u32x4_extend_high_u16x8(pp_x8); |
|
v128_t pp_x4 = wasm_f32x4_convert_i32x4(pp_x4i); // i version is 8 times faster on x64? |
|
|
|
v128_t apx_x4 = wasm_f32x4_sub(px_x4, ax_x4); |
|
v128_t apy_x4 = wasm_f32x4_sub(py_x4, ay_x4); |
|
|
|
v128_t dist_x4 = wasm_f32x4_add( |
|
wasm_f32x4_add( |
|
wasm_f32x4_abs(wasm_f32x4_sub(pp_x4, ap_x4)), |
|
wasm_f32x4_abs(wasm_f32x4_sub(pp_x4, bp_x4)) |
|
), |
|
wasm_f32x4_abs( |
|
wasm_f32x4_add( |
|
wasm_f32x4_mul(apx_x4, dir_nx_x4), |
|
wasm_f32x4_mul(apy_x4, dir_ny_x4) |
|
) |
|
) |
|
); |
|
|
|
v128_t dist_mask = wasm_f32x4_gt(dist_x4, eps_x4); |
|
v128_t max_mask = wasm_f32x4_gt(dist_x4, max_vals_x4); |
|
v128_t mask = wasm_v128_and(dist_mask, max_mask); |
|
|
|
max_index_x4 = wasm_v128_or( |
|
wasm_v128_and(index_x4, mask), |
|
wasm_v128_andnot(max_index_x4, mask) |
|
); |
|
|
|
max_vals_x4 = wasm_v128_or( |
|
wasm_v128_and(dist_x4, mask), |
|
wasm_v128_andnot(max_vals_x4, mask) |
|
); |
|
|
|
index_x4 = wasm_i32x4_add(index_x4, four_x4); |
|
} |
|
|
|
int indices[4]; |
|
float values[4]; |
|
|
|
wasm_v128_store(indices, max_index_x4); |
|
wasm_v128_store(values, max_vals_x4); |
|
|
|
for (int i = 0; i < 4; ++i) { |
|
if (indices[i] != -1) { |
|
if (values[i] > max_dist) { |
|
result = indices[i]; |
|
max_dist = values[i]; |
|
} |
|
} |
|
} |
|
|
|
int remainder = (segment_end - segment_start - 1) % 4; |
|
|
|
for (int i = segment_end - remainder; i < segment_end; ++i) { |
|
float px = xs[coords_from + i]; |
|
float py = ys[coords_from + i]; |
|
|
|
unsigned char pp = pressures[coords_from + i]; |
|
|
|
float apx = px - ax; |
|
float apy = py - ay; |
|
|
|
float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) |
|
+ __builtin_abs(pp - ap) + __builtin_abs(pp - bp); |
|
|
|
if (dist > EPS && dist > max_dist) { |
|
result = i; |
|
max_dist = dist; |
|
} |
|
} |
|
|
|
#endif |
|
|
|
return(result); |
|
} |
|
|
|
int |
|
do_lod(int *clipped_indices, int clipped_count, float zoom, |
|
int *stroke_coords_from, |
|
float *line_threshold, |
|
float *xs, |
|
float *ys, |
|
unsigned char *pressures, |
|
int coordinates_count) |
|
{ |
|
if (clipped_count == 0) { |
|
return(0); |
|
} |
|
|
|
int *segments_from = alloc_dynamic((clipped_count + 1) * 4); |
|
int *segments = alloc_dynamic(coordinates_count * 4); |
|
|
|
int segments_head = 0; |
|
int stack[4096]; // TODO: what's a reasonable max size for this? |
|
|
|
for (int i = 0; i < clipped_count; ++i) { |
|
int stroke_index = clipped_indices[i]; |
|
|
|
// TODO: convert to a proper CSR, save half the memory |
|
int coords_from = stroke_coords_from[stroke_index]; |
|
int coords_to = stroke_coords_from[stroke_index + 1]; |
|
|
|
int point_count = coords_to - coords_from; |
|
|
|
// Basic CSR crap |
|
segments_from[i] = segments_head; |
|
|
|
if (zoom < line_threshold[stroke_index]) { |
|
// Fast paths for collapsing to a single line segment |
|
segments[segments_head++] = 0; |
|
segments[segments_head++] = point_count - 1; |
|
continue; |
|
} |
|
|
|
int segment_count = 2; |
|
int stack_head = 0; |
|
|
|
segments[segments_head++] = 0; |
|
|
|
stack[stack_head++] = 0; |
|
stack[stack_head++] = 0; |
|
stack[stack_head++] = point_count - 1; |
|
|
|
while (stack_head > 0) { |
|
int end = stack[--stack_head]; |
|
int start = stack[--stack_head]; |
|
int type = stack[--stack_head]; |
|
|
|
if (type == 1) { |
|
segments[segments_head++] = start; |
|
} else { |
|
int max = rdp_find_max(xs, ys, pressures, zoom, coords_from, start, end); |
|
if (max != -1) { |
|
segment_count += 1; |
|
|
|
stack[stack_head++] = 0; |
|
stack[stack_head++] = max; |
|
stack[stack_head++] = end; |
|
|
|
stack[stack_head++] = 1; |
|
stack[stack_head++] = max; |
|
stack[stack_head++] = -1; |
|
|
|
stack[stack_head++] = 0; |
|
stack[stack_head++] = start; |
|
stack[stack_head++] = max; |
|
} |
|
} |
|
} |
|
|
|
segments[segments_head++] = point_count - 1; |
|
|
|
if (segment_count == 2 && zoom > line_threshold[stroke_index]) { |
|
line_threshold[stroke_index] = zoom; |
|
} |
|
} |
|
|
|
segments_from[clipped_count] = segments_head; |
|
|
|
// Write actual coordinates (points) and stroke ids |
|
float *points = alloc_dynamic(segments_head * 2 * 4); |
|
int *ids = alloc_dynamic(segments_head * 4); |
|
unsigned char *pressures_res = alloc_dynamic(segments_head); |
|
|
|
int phead = 0; |
|
int ihead = 0; |
|
|
|
for (int i = 0; i < clipped_count; ++i) { |
|
int stroke_index = clipped_indices[i]; |
|
|
|
// TODO: convert to a proper CSR, save half the memory |
|
int base_stroke = stroke_coords_from[stroke_index]; |
|
int from = segments_from[i]; |
|
int to = segments_from[i + 1]; |
|
|
|
for (int j = from; j < to; ++j) { |
|
int point_index = segments[j]; |
|
float x = xs[base_stroke + point_index]; |
|
float y = ys[base_stroke + point_index]; |
|
|
|
points[phead++] = x; |
|
points[phead++] = y; |
|
|
|
pressures_res[ihead] = pressures[base_stroke + point_index]; |
|
|
|
if (j != to - 1) { |
|
ids[ihead++] = stroke_index; |
|
} else { |
|
ids[ihead++] = stroke_index | (1 << 31); |
|
} |
|
} |
|
} |
|
|
|
return(segments_head); |
|
}
|
|
|