|
|
|
@ -1,3 +1,5 @@
@@ -1,3 +1,5 @@
|
|
|
|
|
// clang -g -Wall -Wextra -O3 -Wl,--export-all,--no-entry --target=wasm32 -Xclang -target-feature -Xclang +simd128 lod.c -nostdlib -o lod.wasm
|
|
|
|
|
|
|
|
|
|
#include <wasm_simd128.h> |
|
|
|
|
|
|
|
|
|
extern char __heap_base; |
|
|
|
@ -34,20 +36,18 @@ alloc_dynamic(int size)
@@ -34,20 +36,18 @@ alloc_dynamic(int size)
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int |
|
|
|
|
rdp_find_max(float *coordinates, unsigned char *pressures, float zoom, int coords_from, |
|
|
|
|
rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coords_from, |
|
|
|
|
int segment_start, int segment_end) |
|
|
|
|
{ |
|
|
|
|
float EPS = 0.125 / zoom; |
|
|
|
|
|
|
|
|
|
// __i32x4 a = wasm_i32x4_load16x4(coordinates);
|
|
|
|
|
float EPS = 0.125f / zoom * 255.0f; |
|
|
|
|
|
|
|
|
|
int result = -1; |
|
|
|
|
float max_dist = 0.0f; |
|
|
|
|
|
|
|
|
|
float ax = coordinates[coords_from + segment_start * 2 + 0]; |
|
|
|
|
float ay = coordinates[coords_from + segment_start * 2 + 1]; |
|
|
|
|
float bx = coordinates[coords_from + segment_end * 2 + 0]; |
|
|
|
|
float by = coordinates[coords_from + segment_end * 2 + 1]; |
|
|
|
|
float ax = xs[coords_from + segment_start]; |
|
|
|
|
float ay = ys[coords_from + segment_start]; |
|
|
|
|
float bx = xs[coords_from + segment_end]; |
|
|
|
|
float by = ys[coords_from + segment_end]; |
|
|
|
|
|
|
|
|
|
unsigned char ap = pressures[coords_from / 2 + segment_start]; |
|
|
|
|
unsigned char bp = pressures[coords_from / 2 + segment_end]; |
|
|
|
@ -56,12 +56,63 @@ rdp_find_max(float *coordinates, unsigned char *pressures, float zoom, int coord
@@ -56,12 +56,63 @@ rdp_find_max(float *coordinates, unsigned char *pressures, float zoom, int coord
|
|
|
|
|
float dy = by - ay; |
|
|
|
|
|
|
|
|
|
float dist_ab = __builtin_sqrtf(dx * dx + dy * dy); |
|
|
|
|
float dir_nx = dy / dist_ab; |
|
|
|
|
float dir_ny = -dx / dist_ab; |
|
|
|
|
float dir_nx = dy / dist_ab * 255.0f; |
|
|
|
|
float dir_ny = -dx / dist_ab * 255.0f; |
|
|
|
|
|
|
|
|
|
#if 0 |
|
|
|
|
v128_t scale_255 = wasm_f32x4_splat(1.0f / 255.0f); |
|
|
|
|
v128_t EPSs = wasm_f32x4_splat(EPS); |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
for (int i = segment_start + 1; i < segment_end; ++i) { |
|
|
|
|
float px = coordinates[coords_from + i * 2 + 0]; |
|
|
|
|
float py = coordinates[coords_from + i * 2 + 1]; |
|
|
|
|
#if 0 |
|
|
|
|
v128_t pxs = wasm_v128_load(coordinates_x + coords_from + i); |
|
|
|
|
v128_t pxs = wasm_v128_load(coordinates_y + coords_from + i); |
|
|
|
|
|
|
|
|
|
v128_t pps = wasm_v128_load(pressures + coords_from + i); |
|
|
|
|
|
|
|
|
|
v128_t apxs = wasm_f32x4_sub(pxs, axs); |
|
|
|
|
v128_t apys = wasm_f32x4_sub(pys, ays); |
|
|
|
|
|
|
|
|
|
v128_t dists = wasm_f32x4_add( |
|
|
|
|
wasm_f32x4_add( |
|
|
|
|
wasm_f32x4_mul(wasm_f32x4_abs(wasm_f32x4_sub(pps, aps)), scale_255), |
|
|
|
|
wasm_f32x4_mul(wasm_f32x4_abs(wasm_f32x4_sub(pps, bps)), scale_255) |
|
|
|
|
), |
|
|
|
|
wasm_f32x4_abs( |
|
|
|
|
wasm_f32x4_add( |
|
|
|
|
wasm_f32x4_mul(apxs, dir_nxs), |
|
|
|
|
wasm_f32x4_mul(apys, dir_nys) |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
v128_t dist_mask = wasm_f32x4_gt(dists, EPSs); |
|
|
|
|
v128_t max_mask = wasm_f32x4_gt(dists, max_dists); |
|
|
|
|
v128_t final_mask = wasm_v128_and(dist_mask, max_mask); |
|
|
|
|
|
|
|
|
|
if (!wasm_v128_any_true(final_mask)) { |
|
|
|
|
// fast path? hopefully?
|
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
// Places max(0, 2) and max(1, 3) into lanes (0, 1)
|
|
|
|
|
v128_t max_02_13 = wasm_f32x4_max( |
|
|
|
|
dists, |
|
|
|
|
wasm_i32x4_shuffle(dists, dists, 2, 3, 2, 3) |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
// Places max(max(0, 2), max(1, 3)) into lane 0
|
|
|
|
|
v128_t max_0123 = wasm_f32x4_max( |
|
|
|
|
max_02_13, |
|
|
|
|
wasm_i32x4_shuffle(max_02_13, max_02_13, 1, 1, 1, 1) |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
float final_max = wasm_f32x4_extract_lane(max_0123, 0); |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
float px = xs[coords_from + i]; |
|
|
|
|
float py = ys[coords_from + i]; |
|
|
|
|
|
|
|
|
|
unsigned char pp = pressures[coords_from + i]; |
|
|
|
|
|
|
|
|
@ -69,7 +120,7 @@ rdp_find_max(float *coordinates, unsigned char *pressures, float zoom, int coord
@@ -69,7 +120,7 @@ rdp_find_max(float *coordinates, unsigned char *pressures, float zoom, int coord
|
|
|
|
|
float apy = py - ay; |
|
|
|
|
|
|
|
|
|
float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) |
|
|
|
|
+ __builtin_abs(pp - ap) / 255.0f + __builtin_abs(pp - bp) / 255.0f; |
|
|
|
|
+ __builtin_abs(pp - ap) + __builtin_abs(pp - bp); |
|
|
|
|
|
|
|
|
|
if (dist > EPS && dist > max_dist) { |
|
|
|
|
result = i; |
|
|
|
@ -84,7 +135,8 @@ int
@@ -84,7 +135,8 @@ int
|
|
|
|
|
do_lod(int *clipped_indices, int clipped_count, float zoom, |
|
|
|
|
int *stroke_coords_from, |
|
|
|
|
float *line_threshold, |
|
|
|
|
float *coordinates, |
|
|
|
|
float *xs, |
|
|
|
|
float *ys, |
|
|
|
|
unsigned char *pressures, |
|
|
|
|
int coordinates_count) |
|
|
|
|
{ |
|
|
|
@ -93,7 +145,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
@@ -93,7 +145,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
int *segments_from = alloc_dynamic((clipped_count + 1) * 4); |
|
|
|
|
int *segments = alloc_dynamic(coordinates_count / 2 * 4); |
|
|
|
|
int *segments = alloc_dynamic(coordinates_count * 4); |
|
|
|
|
|
|
|
|
|
int segments_head = 0; |
|
|
|
|
int stack[4096]; // TODO: what's a reasonable max size for this?
|
|
|
|
@ -105,7 +157,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
@@ -105,7 +157,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
|
|
|
|
|
int coords_from = stroke_coords_from[stroke_index]; |
|
|
|
|
int coords_to = stroke_coords_from[stroke_index + 1]; |
|
|
|
|
|
|
|
|
|
int point_count = (coords_to - coords_from) / 2; |
|
|
|
|
int point_count = coords_to - coords_from; |
|
|
|
|
|
|
|
|
|
// Basic CSR crap
|
|
|
|
|
segments_from[i] = segments_head; |
|
|
|
@ -134,7 +186,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
@@ -134,7 +186,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
|
|
|
|
|
if (type == 1) { |
|
|
|
|
segments[segments_head++] = start; |
|
|
|
|
} else { |
|
|
|
|
int max = rdp_find_max(coordinates, pressures, zoom, coords_from, start, end); |
|
|
|
|
int max = rdp_find_max(xs, ys, pressures, zoom, coords_from, start, end); |
|
|
|
|
if (max != -1) { |
|
|
|
|
segment_count += 1; |
|
|
|
|
|
|
|
|
@ -180,13 +232,13 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
@@ -180,13 +232,13 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
|
|
|
|
|
|
|
|
|
|
for (int j = from; j < to; ++j) { |
|
|
|
|
int point_index = segments[j]; |
|
|
|
|
float x = coordinates[base_stroke + point_index * 2 + 0]; |
|
|
|
|
float y = coordinates[base_stroke + point_index * 2 + 1]; |
|
|
|
|
float x = xs[base_stroke + point_index]; |
|
|
|
|
float y = ys[base_stroke + point_index]; |
|
|
|
|
|
|
|
|
|
points[phead++] = x; |
|
|
|
|
points[phead++] = y; |
|
|
|
|
|
|
|
|
|
pressures_res[ihead] = pressures[base_stroke / 2 + point_index]; |
|
|
|
|
pressures_res[ihead] = pressures[base_stroke + point_index]; |
|
|
|
|
|
|
|
|
|
if (j != to - 1) { |
|
|
|
|
ids[ihead++] = stroke_index; |
|
|
|
|