diff --git a/README.md b/README.txt similarity index 98% rename from README.md rename to README.txt index 39333a1..66008e4 100644 --- a/README.md +++ b/README.txt @@ -9,6 +9,7 @@ Release: - Z-prepass fringe bug (also, when do we enable the prepass?) - Textured quads (pictures, code already written in older version) - Resize and move pictures (draw handles) + - SIMD for LOD? + Bugs + GC stalls!!! + Stroke previews get connected when drawn without panning on touch devices @@ -21,10 +22,10 @@ Release: + Fix blinking own stroke inbetween SYN->server and SYN->client + Drag with mouse button 3 + Investigate skipped inputs on mobile (panning, zooming) [Events were not actually getting skipped. The stroke previews were just not being drawn] + - Be able to have multiple "current" strokes per player. In case of bad internet this can happen! - Do NOT use session id as player id LUL - Save events to indexeddb (as some kind of a blob), restore on reconnect and page reload - Local prediction for tools! - - Be able to have multiple "current" strokes per player. In case of bad internet this can happen! * Missing features I do not consider bonus + Player pointers + Pretty player pointers @@ -52,7 +53,7 @@ Bonus: + Add pressure data to quads + Account for pressure in quad/bbox calc + Adjust curve simplification to include pressure info - - Migrate old non-pressure desks + + Migrate old non-pressure desks - Check out e.pressure on touch devices - Send pressure in PREDRAW event - Curve modification diff --git a/client/wasm/lod.c b/client/wasm/lod.c index 93d492d..e63d270 100644 --- a/client/wasm/lod.c +++ b/client/wasm/lod.c @@ -62,26 +62,26 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo float dist_ab = __builtin_sqrtf(dx * dx + dy * dy); float dir_nx = dy / dist_ab * 255.0f; float dir_ny = -dx / dist_ab * 255.0f; + #if 0 - for (int i = segment_start + 1; i < segment_end; ++i) { - float px = xs[coords_from + i]; - float py = ys[coords_from + i]; - - unsigned char pp = pressures[coords_from + i]; + for (int i = segment_start + 1; i < segment_end; ++i) { + float px = xs[coords_from + i]; + float py = ys[coords_from + i]; - float apx = px - ax; - float apy = py - ay; - - float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) - + __builtin_abs(pp - ap) + __builtin_abs(pp - bp); - - if (dist > EPS && dist > max_dist) { - result = i; - max_dist = dist; + unsigned char pp = pressures[coords_from + i]; + + float apx = px - ax; + float apy = py - ay; + + float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) + + __builtin_abs(pp - ap) + __builtin_abs(pp - bp); + + if (dist > EPS && dist > max_dist) { + result = i; + max_dist = dist; + } } - } #else - v128_t eps_x4 = wasm_f32x4_splat(EPS); v128_t ax_x4 = wasm_f32x4_splat(ax); v128_t ay_x4 = wasm_f32x4_splat(ay); v128_t ap_x4 = wasm_f32x4_splat(ap); @@ -91,18 +91,19 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo v128_t index_x4 = wasm_u32x4_make(segment_start + 1, segment_start + 2, segment_start + 3, segment_start + 4); v128_t four_x4 = wasm_u32x4_const_splat(4); - v128_t max_vals_x4 = wasm_f32x4_const_splat(0.0f); + v128_t max_dist_x4 = wasm_f32x4_splat(EPS); v128_t max_index_x4 = wasm_u32x4_const_splat(-1); for (int i = segment_start + 1; i < segment_end - 3; i += 4) { v128_t px_x4 = wasm_v128_load(xs + coords_from + i); v128_t py_x4 = wasm_v128_load(ys + coords_from + i); - v128_t pp_x16 = wasm_v128_load(pressures + coords_from / 2 + i); - // Take 4 highest bytes and convert to float - v128_t pp_x8 = wasm_u16x8_extend_high_u8x16(pp_x16); - v128_t pp_x4i = wasm_u32x4_extend_high_u16x8(pp_x8); - v128_t pp_x4 = wasm_f32x4_convert_i32x4(pp_x4i); // i version is 8 times faster on x64? + v128_t pp_x4 = wasm_f32x4_make( + pressures[coords_from / 2 + i + 0], + pressures[coords_from / 2 + i + 1], + pressures[coords_from / 2 + i + 2], + pressures[coords_from / 2 + i + 3] + ); v128_t apx_x4 = wasm_f32x4_sub(px_x4, ax_x4); v128_t apy_x4 = wasm_f32x4_sub(py_x4, ay_x4); @@ -120,19 +121,10 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo ) ); - v128_t dist_mask = wasm_f32x4_gt(dist_x4, eps_x4); - v128_t max_mask = wasm_f32x4_gt(dist_x4, max_vals_x4); - v128_t mask = wasm_v128_and(dist_mask, max_mask); - - max_index_x4 = wasm_v128_or( - wasm_v128_and(index_x4, mask), - wasm_v128_andnot(max_index_x4, mask) - ); + v128_t mask = wasm_f32x4_gt(dist_x4, max_dist_x4); - max_vals_x4 = wasm_v128_or( - wasm_v128_and(dist_x4, mask), - wasm_v128_andnot(max_vals_x4, mask) - ); + max_index_x4 = wasm_v128_bitselect(index_x4, max_index_x4, mask); + max_dist_x4 = wasm_v128_bitselect(dist_x4, max_dist_x4, mask); index_x4 = wasm_i32x4_add(index_x4, four_x4); } @@ -141,7 +133,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo float values[4]; wasm_v128_store(indices, max_index_x4); - wasm_v128_store(values, max_vals_x4); + wasm_v128_store(values, max_dist_x4); for (int i = 0; i < 4; ++i) { if (indices[i] != -1) { @@ -152,6 +144,11 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo } } + if (max_dist == EPS) { + max_dist = 0.0f; + result = -1; + } + int remainder = (segment_end - segment_start - 1) % 4; for (int i = segment_end - remainder; i < segment_end; ++i) { @@ -171,9 +168,8 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo max_dist = dist; } } - #endif - + return(result); } diff --git a/client/wasm/lod.wasm b/client/wasm/lod.wasm index 1c62b2f..63161d4 100755 Binary files a/client/wasm/lod.wasm and b/client/wasm/lod.wasm differ