Browse Source

Change extracts of pressure byutes to a f32x4_make. Change andnot masking to a bitselect. Rename readme.md to readme.txt to hopefully fix gitea formatting

ssao
A.Olokhtonov 8 months ago
parent
commit
e949860279
  1. 5
      README.txt
  2. 70
      client/wasm/lod.c
  3. BIN
      client/wasm/lod.wasm

5
README.md → README.txt

@ -9,6 +9,7 @@ Release:
- Z-prepass fringe bug (also, when do we enable the prepass?) - Z-prepass fringe bug (also, when do we enable the prepass?)
- Textured quads (pictures, code already written in older version) - Textured quads (pictures, code already written in older version)
- Resize and move pictures (draw handles) - Resize and move pictures (draw handles)
- SIMD for LOD?
+ Bugs + Bugs
+ GC stalls!!! + GC stalls!!!
+ Stroke previews get connected when drawn without panning on touch devices + Stroke previews get connected when drawn without panning on touch devices
@ -21,10 +22,10 @@ Release:
+ Fix blinking own stroke inbetween SYN->server and SYN->client + Fix blinking own stroke inbetween SYN->server and SYN->client
+ Drag with mouse button 3 + Drag with mouse button 3
+ Investigate skipped inputs on mobile (panning, zooming) [Events were not actually getting skipped. The stroke previews were just not being drawn] + Investigate skipped inputs on mobile (panning, zooming) [Events were not actually getting skipped. The stroke previews were just not being drawn]
- Be able to have multiple "current" strokes per player. In case of bad internet this can happen!
- Do NOT use session id as player id LUL - Do NOT use session id as player id LUL
- Save events to indexeddb (as some kind of a blob), restore on reconnect and page reload - Save events to indexeddb (as some kind of a blob), restore on reconnect and page reload
- Local prediction for tools! - Local prediction for tools!
- Be able to have multiple "current" strokes per player. In case of bad internet this can happen!
* Missing features I do not consider bonus * Missing features I do not consider bonus
+ Player pointers + Player pointers
+ Pretty player pointers + Pretty player pointers
@ -52,7 +53,7 @@ Bonus:
+ Add pressure data to quads + Add pressure data to quads
+ Account for pressure in quad/bbox calc + Account for pressure in quad/bbox calc
+ Adjust curve simplification to include pressure info + Adjust curve simplification to include pressure info
- Migrate old non-pressure desks + Migrate old non-pressure desks
- Check out e.pressure on touch devices - Check out e.pressure on touch devices
- Send pressure in PREDRAW event - Send pressure in PREDRAW event
- Curve modification - Curve modification

70
client/wasm/lod.c

@ -62,26 +62,26 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
float dist_ab = __builtin_sqrtf(dx * dx + dy * dy); float dist_ab = __builtin_sqrtf(dx * dx + dy * dy);
float dir_nx = dy / dist_ab * 255.0f; float dir_nx = dy / dist_ab * 255.0f;
float dir_ny = -dx / dist_ab * 255.0f; float dir_ny = -dx / dist_ab * 255.0f;
#if 0 #if 0
for (int i = segment_start + 1; i < segment_end; ++i) { for (int i = segment_start + 1; i < segment_end; ++i) {
float px = xs[coords_from + i]; float px = xs[coords_from + i];
float py = ys[coords_from + i]; float py = ys[coords_from + i];
unsigned char pp = pressures[coords_from + i];
float apx = px - ax; unsigned char pp = pressures[coords_from + i];
float apy = py - ay;
float apx = px - ax;
float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny) float apy = py - ay;
+ __builtin_abs(pp - ap) + __builtin_abs(pp - bp);
float dist = __builtin_fabsf(apx * dir_nx + apy * dir_ny)
if (dist > EPS && dist > max_dist) { + __builtin_abs(pp - ap) + __builtin_abs(pp - bp);
result = i;
max_dist = dist; if (dist > EPS && dist > max_dist) {
result = i;
max_dist = dist;
}
} }
}
#else #else
v128_t eps_x4 = wasm_f32x4_splat(EPS);
v128_t ax_x4 = wasm_f32x4_splat(ax); v128_t ax_x4 = wasm_f32x4_splat(ax);
v128_t ay_x4 = wasm_f32x4_splat(ay); v128_t ay_x4 = wasm_f32x4_splat(ay);
v128_t ap_x4 = wasm_f32x4_splat(ap); v128_t ap_x4 = wasm_f32x4_splat(ap);
@ -91,18 +91,19 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
v128_t index_x4 = wasm_u32x4_make(segment_start + 1, segment_start + 2, segment_start + 3, segment_start + 4); v128_t index_x4 = wasm_u32x4_make(segment_start + 1, segment_start + 2, segment_start + 3, segment_start + 4);
v128_t four_x4 = wasm_u32x4_const_splat(4); v128_t four_x4 = wasm_u32x4_const_splat(4);
v128_t max_vals_x4 = wasm_f32x4_const_splat(0.0f); v128_t max_dist_x4 = wasm_f32x4_splat(EPS);
v128_t max_index_x4 = wasm_u32x4_const_splat(-1); v128_t max_index_x4 = wasm_u32x4_const_splat(-1);
for (int i = segment_start + 1; i < segment_end - 3; i += 4) { for (int i = segment_start + 1; i < segment_end - 3; i += 4) {
v128_t px_x4 = wasm_v128_load(xs + coords_from + i); v128_t px_x4 = wasm_v128_load(xs + coords_from + i);
v128_t py_x4 = wasm_v128_load(ys + coords_from + i); v128_t py_x4 = wasm_v128_load(ys + coords_from + i);
v128_t pp_x16 = wasm_v128_load(pressures + coords_from / 2 + i);
// Take 4 highest bytes and convert to float v128_t pp_x4 = wasm_f32x4_make(
v128_t pp_x8 = wasm_u16x8_extend_high_u8x16(pp_x16); pressures[coords_from / 2 + i + 0],
v128_t pp_x4i = wasm_u32x4_extend_high_u16x8(pp_x8); pressures[coords_from / 2 + i + 1],
v128_t pp_x4 = wasm_f32x4_convert_i32x4(pp_x4i); // i version is 8 times faster on x64? pressures[coords_from / 2 + i + 2],
pressures[coords_from / 2 + i + 3]
);
v128_t apx_x4 = wasm_f32x4_sub(px_x4, ax_x4); v128_t apx_x4 = wasm_f32x4_sub(px_x4, ax_x4);
v128_t apy_x4 = wasm_f32x4_sub(py_x4, ay_x4); v128_t apy_x4 = wasm_f32x4_sub(py_x4, ay_x4);
@ -120,19 +121,10 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
) )
); );
v128_t dist_mask = wasm_f32x4_gt(dist_x4, eps_x4); v128_t mask = wasm_f32x4_gt(dist_x4, max_dist_x4);
v128_t max_mask = wasm_f32x4_gt(dist_x4, max_vals_x4);
v128_t mask = wasm_v128_and(dist_mask, max_mask);
max_index_x4 = wasm_v128_or(
wasm_v128_and(index_x4, mask),
wasm_v128_andnot(max_index_x4, mask)
);
max_vals_x4 = wasm_v128_or( max_index_x4 = wasm_v128_bitselect(index_x4, max_index_x4, mask);
wasm_v128_and(dist_x4, mask), max_dist_x4 = wasm_v128_bitselect(dist_x4, max_dist_x4, mask);
wasm_v128_andnot(max_vals_x4, mask)
);
index_x4 = wasm_i32x4_add(index_x4, four_x4); index_x4 = wasm_i32x4_add(index_x4, four_x4);
} }
@ -141,7 +133,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
float values[4]; float values[4];
wasm_v128_store(indices, max_index_x4); wasm_v128_store(indices, max_index_x4);
wasm_v128_store(values, max_vals_x4); wasm_v128_store(values, max_dist_x4);
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
if (indices[i] != -1) { if (indices[i] != -1) {
@ -152,6 +144,11 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
} }
} }
if (max_dist == EPS) {
max_dist = 0.0f;
result = -1;
}
int remainder = (segment_end - segment_start - 1) % 4; int remainder = (segment_end - segment_start - 1) % 4;
for (int i = segment_end - remainder; i < segment_end; ++i) { for (int i = segment_end - remainder; i < segment_end; ++i) {
@ -171,9 +168,8 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
max_dist = dist; max_dist = dist;
} }
} }
#endif #endif
return(result); return(result);
} }

BIN
client/wasm/lod.wasm

Binary file not shown.
Loading…
Cancel
Save