From e94986027998b6194aa83ff9fac14ff9413bf86f Mon Sep 17 00:00:00 2001 From: "A.Olokhtonov" Date: Sat, 23 Mar 2024 12:20:10 +0300 Subject: [PATCH] Change extracts of pressure byutes to a f32x4_make. Change andnot masking to a bitselect. Rename readme.md to readme.txt to hopefully fix gitea formatting --- README.md => README.txt | 5 +-- client/wasm/lod.c | 70 +++++++++++++++++++--------------------- client/wasm/lod.wasm | Bin 2295 -> 2279 bytes 3 files changed, 36 insertions(+), 39 deletions(-) rename README.md => README.txt (98%) diff --git a/README.md b/README.txt similarity index 98% rename from README.md rename to README.txt index 39333a1..66008e4 100644 --- a/README.md +++ b/README.txt @@ -9,6 +9,7 @@ Release: - Z-prepass fringe bug (also, when do we enable the prepass?) - Textured quads (pictures, code already written in older version) - Resize and move pictures (draw handles) + - SIMD for LOD? + Bugs + GC stalls!!! + Stroke previews get connected when drawn without panning on touch devices @@ -21,10 +22,10 @@ Release: + Fix blinking own stroke inbetween SYN->server and SYN->client + Drag with mouse button 3 + Investigate skipped inputs on mobile (panning, zooming) [Events were not actually getting skipped. The stroke previews were just not being drawn] + - Be able to have multiple "current" strokes per player. In case of bad internet this can happen! - Do NOT use session id as player id LUL - Save events to indexeddb (as some kind of a blob), restore on reconnect and page reload - Local prediction for tools! - - Be able to have multiple "current" strokes per player. In case of bad internet this can happen! * Missing features I do not consider bonus + Player pointers + Pretty player pointers @@ -52,7 +53,7 @@ Bonus: + Add pressure data to quads + Account for pressure in quad/bbox calc + Adjust curve simplification to include pressure info - - Migrate old non-pressure desks + + Migrate old non-pressure desks - Check out e.pressure on touch devices - Send pressure in PREDRAW event - Curve modification diff --git a/client/wasm/lod.c b/client/wasm/lod.c index 93d492d..e63d270 100644 --- a/client/wasm/lod.c +++ b/client/wasm/lod.c @@ -62,26 +62,26 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo float dist_ab = __builtin_sqrtf(dx * dx + dy * dy); float dir_nx = dy / dist_ab * 255.0f; float dir_ny = -dx / dist_ab * 255.0f; + #if 0 - for (int i = segment_start + 1; i < segment_end; ++i) { - float px = xs[coords_from + i]; - float py = ys[coords_from + i]; - - unsigned char pp = pressures[coords_from + i]; + for (int i = segment_start + 1; i < segment_end; ++i) { + float px = xs[coords_from + i]; + float py = ys[coords_from + i]; - float apx = px - ax; - float apy = py - ay; - - float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) - + __builtin_abs(pp - ap) + __builtin_abs(pp - bp); - - if (dist > EPS && dist > max_dist) { - result = i; - max_dist = dist; + unsigned char pp = pressures[coords_from + i]; + + float apx = px - ax; + float apy = py - ay; + + float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) + + __builtin_abs(pp - ap) + __builtin_abs(pp - bp); + + if (dist > EPS && dist > max_dist) { + result = i; + max_dist = dist; + } } - } #else - v128_t eps_x4 = wasm_f32x4_splat(EPS); v128_t ax_x4 = wasm_f32x4_splat(ax); v128_t ay_x4 = wasm_f32x4_splat(ay); v128_t ap_x4 = wasm_f32x4_splat(ap); @@ -91,18 +91,19 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo v128_t index_x4 = wasm_u32x4_make(segment_start + 1, segment_start + 2, segment_start + 3, segment_start + 4); v128_t four_x4 = wasm_u32x4_const_splat(4); - v128_t max_vals_x4 = wasm_f32x4_const_splat(0.0f); + v128_t max_dist_x4 = wasm_f32x4_splat(EPS); v128_t max_index_x4 = wasm_u32x4_const_splat(-1); for (int i = segment_start + 1; i < segment_end - 3; i += 4) { v128_t px_x4 = wasm_v128_load(xs + coords_from + i); v128_t py_x4 = wasm_v128_load(ys + coords_from + i); - v128_t pp_x16 = wasm_v128_load(pressures + coords_from / 2 + i); - // Take 4 highest bytes and convert to float - v128_t pp_x8 = wasm_u16x8_extend_high_u8x16(pp_x16); - v128_t pp_x4i = wasm_u32x4_extend_high_u16x8(pp_x8); - v128_t pp_x4 = wasm_f32x4_convert_i32x4(pp_x4i); // i version is 8 times faster on x64? + v128_t pp_x4 = wasm_f32x4_make( + pressures[coords_from / 2 + i + 0], + pressures[coords_from / 2 + i + 1], + pressures[coords_from / 2 + i + 2], + pressures[coords_from / 2 + i + 3] + ); v128_t apx_x4 = wasm_f32x4_sub(px_x4, ax_x4); v128_t apy_x4 = wasm_f32x4_sub(py_x4, ay_x4); @@ -120,19 +121,10 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo ) ); - v128_t dist_mask = wasm_f32x4_gt(dist_x4, eps_x4); - v128_t max_mask = wasm_f32x4_gt(dist_x4, max_vals_x4); - v128_t mask = wasm_v128_and(dist_mask, max_mask); - - max_index_x4 = wasm_v128_or( - wasm_v128_and(index_x4, mask), - wasm_v128_andnot(max_index_x4, mask) - ); + v128_t mask = wasm_f32x4_gt(dist_x4, max_dist_x4); - max_vals_x4 = wasm_v128_or( - wasm_v128_and(dist_x4, mask), - wasm_v128_andnot(max_vals_x4, mask) - ); + max_index_x4 = wasm_v128_bitselect(index_x4, max_index_x4, mask); + max_dist_x4 = wasm_v128_bitselect(dist_x4, max_dist_x4, mask); index_x4 = wasm_i32x4_add(index_x4, four_x4); } @@ -141,7 +133,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo float values[4]; wasm_v128_store(indices, max_index_x4); - wasm_v128_store(values, max_vals_x4); + wasm_v128_store(values, max_dist_x4); for (int i = 0; i < 4; ++i) { if (indices[i] != -1) { @@ -152,6 +144,11 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo } } + if (max_dist == EPS) { + max_dist = 0.0f; + result = -1; + } + int remainder = (segment_end - segment_start - 1) % 4; for (int i = segment_end - remainder; i < segment_end; ++i) { @@ -171,9 +168,8 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo max_dist = dist; } } - #endif - + return(result); } diff --git a/client/wasm/lod.wasm b/client/wasm/lod.wasm index 1c62b2f84e4340d3be32f3cc5bf58eb4f4a069bd..63161d4d181a80173dbb4207c15489f85bf3bc27 100755 GIT binary patch delta 652 zcmZ8dO>fgc5cN8CYS(ts*ro+li06fh3YxD%6O!~JriEh<9FPi>1IIN2p#Ty>$l``n zPFeE<_y?#JC%E%BdMI$<2XH_um`yk!_Aoo|y_wlJ`?L1F_F<2)_wo+gbG`(nrQCVt z$xrNs-BMA~mp$|PnFtGDa71c86_`!^haxTBSR#pKVLqCRMN+{$0<9)4qNA|1z)nZz z5GE|t1V4YtH29T$Ylu^bWNIBeh$fSQj%grDnMfkv^x&pewoH{Vf|~(@jVfZnLn`I2 zpb^9bsA?13*O<2@Z@fhVjRIv%OUjr^tFmFwhXWFu@3svi-!AwXt+Os~+ZR|YU)qZ| zVw@GCZWw&!h?EV?(Dc7dT{D|lrMQJQ#i8wz5^xQKZju2*tm&eq$l5v-QkVtm~H#;2jCR+FP{`*fR52LAHl^@HW^L6-2 zyBMmkOnqY*pfbT*Q>;#yI%dk(PX9e*VOgkqhM|HvW|nGpc_wscy(p+u)i1_7*wp>U o3>|DtGd<}PgUMx=zZe$rF-t zU(yIdLd+NwJTN_PC|-Mo8frO;m`aM6QcYEJXQMSD>#sL0YrL8BRf@B&cH9LPtEcY# z%^2sTtXdXdJ|w1(6EyuVQZ>j1np6j9P!&3`j;*w)&I4Y` zjN!6O12RclnaEO>mI?!u5DzIRB}ZWLH568JTtV41O_d+)6Sp|==%bqP1c^BRm?tA7 z+4}>!wVZLEo{cbn1fMWo$I9)BNxk=wC-d|hb!rmzsrYfcjzCie2>p%e?+j}wPx`^M z*GEi$HD;Eif%QwK-!T1)@fJ3Wg18p?)05F2G2TU6Z=s8x-qJ(Tb@lMycS0L2eM4X8 aX&YDR_&-T!Dp9uAW0C567pdO$p8o+gd8oAj