Browse Source

Change extracts of pressure byutes to a f32x4_make. Change andnot masking to a bitselect. Rename readme.md to readme.txt to hopefully fix gitea formatting

ssao
A.Olokhtonov 8 months ago
parent
commit
e949860279
  1. 5
      README.txt
  2. 38
      client/wasm/lod.c
  3. BIN
      client/wasm/lod.wasm

5
README.md → README.txt

@ -9,6 +9,7 @@ Release: @@ -9,6 +9,7 @@ Release:
- Z-prepass fringe bug (also, when do we enable the prepass?)
- Textured quads (pictures, code already written in older version)
- Resize and move pictures (draw handles)
- SIMD for LOD?
+ Bugs
+ GC stalls!!!
+ Stroke previews get connected when drawn without panning on touch devices
@ -21,10 +22,10 @@ Release: @@ -21,10 +22,10 @@ Release:
+ Fix blinking own stroke inbetween SYN->server and SYN->client
+ Drag with mouse button 3
+ Investigate skipped inputs on mobile (panning, zooming) [Events were not actually getting skipped. The stroke previews were just not being drawn]
- Be able to have multiple "current" strokes per player. In case of bad internet this can happen!
- Do NOT use session id as player id LUL
- Save events to indexeddb (as some kind of a blob), restore on reconnect and page reload
- Local prediction for tools!
- Be able to have multiple "current" strokes per player. In case of bad internet this can happen!
* Missing features I do not consider bonus
+ Player pointers
+ Pretty player pointers
@ -52,7 +53,7 @@ Bonus: @@ -52,7 +53,7 @@ Bonus:
+ Add pressure data to quads
+ Account for pressure in quad/bbox calc
+ Adjust curve simplification to include pressure info
- Migrate old non-pressure desks
+ Migrate old non-pressure desks
- Check out e.pressure on touch devices
- Send pressure in PREDRAW event
- Curve modification

38
client/wasm/lod.c

@ -62,6 +62,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -62,6 +62,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
float dist_ab = __builtin_sqrtf(dx * dx + dy * dy);
float dir_nx = dy / dist_ab * 255.0f;
float dir_ny = -dx / dist_ab * 255.0f;
#if 0
for (int i = segment_start + 1; i < segment_end; ++i) {
float px = xs[coords_from + i];
@ -81,7 +82,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -81,7 +82,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
}
}
#else
v128_t eps_x4 = wasm_f32x4_splat(EPS);
v128_t ax_x4 = wasm_f32x4_splat(ax);
v128_t ay_x4 = wasm_f32x4_splat(ay);
v128_t ap_x4 = wasm_f32x4_splat(ap);
@ -91,18 +91,19 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -91,18 +91,19 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
v128_t index_x4 = wasm_u32x4_make(segment_start + 1, segment_start + 2, segment_start + 3, segment_start + 4);
v128_t four_x4 = wasm_u32x4_const_splat(4);
v128_t max_vals_x4 = wasm_f32x4_const_splat(0.0f);
v128_t max_dist_x4 = wasm_f32x4_splat(EPS);
v128_t max_index_x4 = wasm_u32x4_const_splat(-1);
for (int i = segment_start + 1; i < segment_end - 3; i += 4) {
v128_t px_x4 = wasm_v128_load(xs + coords_from + i);
v128_t py_x4 = wasm_v128_load(ys + coords_from + i);
v128_t pp_x16 = wasm_v128_load(pressures + coords_from / 2 + i);
// Take 4 highest bytes and convert to float
v128_t pp_x8 = wasm_u16x8_extend_high_u8x16(pp_x16);
v128_t pp_x4i = wasm_u32x4_extend_high_u16x8(pp_x8);
v128_t pp_x4 = wasm_f32x4_convert_i32x4(pp_x4i); // i version is 8 times faster on x64?
v128_t pp_x4 = wasm_f32x4_make(
pressures[coords_from / 2 + i + 0],
pressures[coords_from / 2 + i + 1],
pressures[coords_from / 2 + i + 2],
pressures[coords_from / 2 + i + 3]
);
v128_t apx_x4 = wasm_f32x4_sub(px_x4, ax_x4);
v128_t apy_x4 = wasm_f32x4_sub(py_x4, ay_x4);
@ -120,19 +121,10 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -120,19 +121,10 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
)
);
v128_t dist_mask = wasm_f32x4_gt(dist_x4, eps_x4);
v128_t max_mask = wasm_f32x4_gt(dist_x4, max_vals_x4);
v128_t mask = wasm_v128_and(dist_mask, max_mask);
max_index_x4 = wasm_v128_or(
wasm_v128_and(index_x4, mask),
wasm_v128_andnot(max_index_x4, mask)
);
v128_t mask = wasm_f32x4_gt(dist_x4, max_dist_x4);
max_vals_x4 = wasm_v128_or(
wasm_v128_and(dist_x4, mask),
wasm_v128_andnot(max_vals_x4, mask)
);
max_index_x4 = wasm_v128_bitselect(index_x4, max_index_x4, mask);
max_dist_x4 = wasm_v128_bitselect(dist_x4, max_dist_x4, mask);
index_x4 = wasm_i32x4_add(index_x4, four_x4);
}
@ -141,7 +133,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -141,7 +133,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
float values[4];
wasm_v128_store(indices, max_index_x4);
wasm_v128_store(values, max_vals_x4);
wasm_v128_store(values, max_dist_x4);
for (int i = 0; i < 4; ++i) {
if (indices[i] != -1) {
@ -152,6 +144,11 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -152,6 +144,11 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
}
}
if (max_dist == EPS) {
max_dist = 0.0f;
result = -1;
}
int remainder = (segment_end - segment_start - 1) % 4;
for (int i = segment_end - remainder; i < segment_end; ++i) {
@ -171,7 +168,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -171,7 +168,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
max_dist = dist;
}
}
#endif
return(result);

BIN
client/wasm/lod.wasm

Binary file not shown.
Loading…
Cancel
Save