Browse Source

Store WASM-processed data in WASM memory

ssao
aolo2 10 months ago
parent
commit
71f1016a40
  1. 4
      README.md
  2. 9
      client/aux.js
  3. 19
      client/client_recv.js
  4. 7
      client/index.js
  5. 55
      client/math.js
  6. 175
      client/speed.js
  7. 44
      client/wasm/lod.c
  8. BIN
      client/wasm/lod.wasm
  9. 10
      client/webgl_geometry.js

4
README.md

@ -4,8 +4,8 @@ Release: @@ -4,8 +4,8 @@ Release:
+ Reuse points, pack "nodraw" in high bit of stroke id (probably have at least one more bit, so up to 4 flag configurations)
+ Draw dynamic data (strokes in progress)
+ Webassembly for core LOD generation
- Webassembly for final buffers
- Do not copy memory from wasm and back
+ Webassembly for final buffers
+ Do not copy memory to wasm, instead use wasm memory to store data in the first place
- Z-prepass fringe bug (also, when do we enable the prepass?)
- Textured quads (pictures, code already written in older version)
- Resize and move pictures (draw handles)

9
client/aux.js

@ -121,6 +121,15 @@ function tv_create(class_name, capacity) { @@ -121,6 +121,15 @@ function tv_create(class_name, capacity) {
};
}
function tv_create_on(class_name, capacity, buffer, offset) {
return {
'class_name': class_name,
'data': new class_name(buffer, offset, capacity),
'capacity': capacity,
'size': 0,
};
}
function tv_data(tv) {
return tv.data.subarray(0, tv.size);
}

19
client/client_recv.js

@ -101,18 +101,19 @@ function des_event(d, state = null) { @@ -101,18 +101,19 @@ function des_event(d, state = null) {
const coords = des_f32array(d, point_count * 2);
tv_ensure_by(state.coordinates, coords.length);
tv_ensure_by(state.coords_from, 1);
tv_ensure_by(state.coords_to, 1);
wasm_ensure_by(state, 1, coords.length);
tv_add(state.coords_from, state.coordinates.size);
tv_add(state.coords_to, state.coordinates.size + point_count * 2);
const coordinates = state.wasm.buffers['coordinates'];
tv_add(state.wasm.buffers['coords_from'].tv, coordinates.tv.size + point_count * 2);
state.wasm.buffers['coords_from'].used += 4; // 4 bytes, not 4 ints
// TODO: remove, this is duplicate data
event.coords_from = state.coordinates.size;
event.coords_to = state.coordinates.size + point_count * 2;
event.coords_from = coordinates.tv.size;
event.coords_to = coordinates.tv.size + point_count * 2;
tv_append(state.coordinates, coords);
tv_append(coordinates.tv, coords);
state.wasm.buffers['coordinates'].used += point_count * 2 * 4;
event.stroke_id = stroke_id;
@ -413,7 +414,7 @@ async function handle_message(state, context, d) { @@ -413,7 +414,7 @@ async function handle_message(state, context, d) {
const user_count = des_u32(d);
const total_points = des_u32(d);
state.coordinates = tv_create(Float32Array, round_to_pow2(total_points * 2, 4096));
wasm_ensure_by(state, event_count, round_to_pow2(total_points * 2));
if (config.debug_print) console.debug(`${event_count} events in init`);

7
client/index.js

@ -177,13 +177,6 @@ async function main() { @@ -177,13 +177,6 @@ async function main() {
'starting_index': 0,
'total_points': 0,
'coordinates': tv_create(Float32Array, 4096),
'line_threshold': tv_create(Float32Array, 4096),
'coords_from': tv_create(Uint32Array, 4096),
'coords_to': tv_create(Uint32Array, 4096),
'segments_from': tv_create(Uint32Array, 4096),
'segments': tv_create(Uint32Array, 4096),
'bvh': {
'nodes': [],
'root': null,

55
client/math.js

@ -16,47 +16,6 @@ function canvas_to_screen(state, p) { @@ -16,47 +16,6 @@ function canvas_to_screen(state, p) {
return {'x': xs, 'y': ys};
}
/*
function rdp_find_max(state, zoom, stroke, start, end) {
// Finds a point from the range [start, end) with the maximum distance from the line (start--end) that is also further than EPS
const EPS = 1.0 / zoom;
let result = -1;
let max_dist = 0;
const ax = state.coordinates.data[stroke.coords_from + start * 2 + 0];
const ay = state.coordinates.data[stroke.coords_from + start * 2 + 1];
const bx = state.coordinates.data[stroke.coords_from + end * 2 + 0];
const by = state.coordinates.data[stroke.coords_from + end * 2 + 1];
const dx = bx - ax;
const dy = by - ay;
const dist_ab = Math.sqrt(dx * dx + dy * dy);
const dir_nx = dy / dist_ab;
const dir_ny = -dx / dist_ab;
for (let i = start + 1; i < end; ++i) {
const px = state.coordinates.data[stroke.coords_from + i * 2 + 0];
const py = state.coordinates.data[stroke.coords_from + i * 2 + 1];
const apx = px - ax;
const apy = py - ay;
const dist = Math.abs(apx * dir_nx + apy * dir_ny);
if (dist > EPS && dist > max_dist) {
result = i;
max_dist = dist;
}
}
state.stats.rdp_max_count++;
state.stats.rdp_segments += end - start - 1;
return result;
}
*/
function process_rdp_indices_r(state, zoom, mask, stroke, start, end) {
// Looks like the recursive implementation spends most of its time in the function call overhead
@ -294,15 +253,17 @@ function segment_interesects_quad(a, b, quad_topleft, quad_bottomright, quad_top @@ -294,15 +253,17 @@ function segment_interesects_quad(a, b, quad_topleft, quad_bottomright, quad_top
function stroke_bbox(state, stroke) {
const radius = stroke.width / 2;
let min_x = state.coordinates.data[stroke.coords_from + 0] - radius;
let max_x = state.coordinates.data[stroke.coords_from + 0] + radius;
const coordinates = state.wasm.buffers['coordinates'].tv.data;
let min_x = coordinates[stroke.coords_from + 0] - radius;
let max_x = coordinates[stroke.coords_from + 0] + radius;
let min_y = state.coordinates.data[stroke.coords_from + 1] - radius;
let max_y = state.coordinates.data[stroke.coords_from + 1] + radius;
let min_y = coordinates[stroke.coords_from + 1] - radius;
let max_y = coordinates[stroke.coords_from + 1] + radius;
for (let i = stroke.coords_from + 2; i < stroke.coords_to; i += 2) {
const px = state.coordinates.data[i + 0];
const py = state.coordinates.data[i + 1];
const px = coordinates[i + 0];
const py = coordinates[i + 1];
min_x = Math.min(min_x, px - radius);
min_y = Math.min(min_y, py - radius);

175
client/speed.js

@ -1,77 +1,118 @@ @@ -1,77 +1,118 @@
async function init_wasm(state) {
const results = await WebAssembly.instantiateStreaming(fetch('wasm/lod.wasm'));
state.wasm.exports = results.instance.exports;
state.wasm.exports.memory.grow(4096);
state.wasm.stroke_bytes = 4096;
state.wasm.coords_bytes = 4096;
state.wasm.buffers = {
'coordinates': {
'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes),
'used': 0
},
'coords_from': {
'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes),
'used': 0,
},
'line_threshold': {
'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes),
'used': 0,
},
};
const mem = state.wasm.exports.memory.buffer;
state.wasm.buffers['coordinates'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 4,
mem, state.wasm.buffers['coordinates'].offset);
state.wasm.buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4,
mem, state.wasm.buffers['coords_from'].offset);
state.wasm.buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4,
mem, state.wasm.buffers['line_threshold'].offset);
tv_add(state.wasm.buffers['coords_from'].tv, 0);
state.wasm.buffers['coords_from'].used = 4;
}
function rdp_find_max(state, zoom, coords_from, start, end) {
// Finds a point from the range [start, end) with the maximum distance from the line (start--end) that is also further than EPS
const EPS = 1.0 / zoom;
function wasm_ensure_by(state, nstrokes, ncoords) {
const buffers = state.wasm.buffers;
let result = -1;
let max_dist = 0;
const old_coords_from_offset = buffers['coords_from'].offset;
const old_line_threshold_offset = buffers['line_threshold'].offset;
const ax = state.coordinates.data[coords_from + start * 2 + 0];
const ay = state.coordinates.data[coords_from + start * 2 + 1];
const bx = state.coordinates.data[coords_from + end * 2 + 0];
const by = state.coordinates.data[coords_from + end * 2 + 1];
const old_size_coords = state.wasm.coords_bytes;
const old_size_strokes = state.wasm.stroke_bytes;
const dx = bx - ax;
const dy = by - ay;
let realloc = false;
const dist_ab = Math.sqrt(dx * dx + dy * dy);
const dir_nx = dy / dist_ab;
const dir_ny = -dx / dist_ab;
if (buffers['coordinates'].used + ncoords * 4 > state.wasm.coords_bytes) {
state.wasm.coords_bytes += round_to_pow2(ncoords, 4096 * 16); // 1 wasm page (although it doesn't matter here)
realloc = true;
}
for (let i = start + 1; i < end; ++i) {
const px = state.coordinates.data[coords_from + i * 2 + 0];
const py = state.coordinates.data[coords_from + i * 2 + 1];
if (buffers['coords_from'].used + nstrokes * 4 > state.wasm.stroke_bytes) {
state.wasm.stroke_bytes += round_to_pow2(nstrokes, 4096 * 16);
realloc = true;
}
const apx = px - ax;
const apy = py - ay;
if (realloc) {
// TODO: we do memory.grow() somewhere here if needed
const dist = Math.abs(apx * dir_nx + apy * dir_ny);
state.wasm.exports.free_static();
if (dist > EPS && dist > max_dist) {
result = i;
max_dist = dist;
}
}
const mem = state.wasm.exports.memory.buffer;
const memv = new Uint8Array(mem);
buffers['coordinates'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes);
buffers['coords_from'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes);
buffers['line_threshold'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes);
state.stats.rdp_max_count++;
state.stats.rdp_segments += end - start - 1;
buffers['coordinates'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 4, mem, buffers['coordinates'].offset);
buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, mem, buffers['coords_from'].offset);
buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4, mem, buffers['line_threshold'].offset);
return result;
buffers['coordinates'].tv.size = buffers['coordinates'].used / 4;
buffers['coords_from'].tv.size = buffers['coords_from'].used / 4;
buffers['line_threshold'].tv.size = buffers['line_threshold'].used / 4;
const tmp = new Uint8Array(state.wasm.stroke_bytes); // TODO: needed?
// First we move the line_threshold, only then coords_from (otherwise we will overwrite)
tmp.set(new Uint8Array(mem, old_line_threshold_offset, old_size_strokes));
memv.set(tmp, buffers['coordinates'].offset + state.wasm.coords_bytes + state.wasm.stroke_bytes);
tmp.set(new Uint8Array(mem, old_coords_from_offset, old_size_strokes));
memv.set(tmp, buffers['coordinates'].offset + state.wasm.coords_bytes);
}
}
function do_lod_wasm(state, context) {
state.wasm.exports.total_free();
const clipped_indices = state.wasm.exports.alloc(context.clipped_indices.size * 4);
const stroke_coords_from = state.wasm.exports.alloc(state.coords_from.size * 4);
const stroke_coords_to = state.wasm.exports.alloc(state.coords_to.size * 4);
const line_threshold = state.wasm.exports.alloc(state.line_threshold.size * 4);
const segments_from = state.wasm.exports.alloc((context.clipped_indices.size + 1) * 4);
const segments = state.wasm.exports.alloc(state.segments.capacity * 4);
const coordinates = state.wasm.exports.alloc(state.coordinates.size * 4);
state.wasm.exports.free_dynamic();
const clipped_indices = state.wasm.exports.alloc_dynamic(context.clipped_indices.size * 4);
const mem = new Uint8Array(state.wasm.exports.memory.buffer);
// Dynamic input data that should (by design) never be too big
mem.set(tv_bytes(context.clipped_indices), clipped_indices);
mem.set(tv_bytes(state.coords_from), stroke_coords_from);
mem.set(tv_bytes(state.coords_to), stroke_coords_to);
mem.set(tv_bytes(state.line_threshold), line_threshold);
mem.set(tv_bytes(state.coordinates), coordinates);
const buffers = state.wasm.buffers;
const segment_count = state.wasm.exports.do_lod(
clipped_indices, context.clipped_indices.size, state.canvas.zoom,
stroke_coords_from, stroke_coords_to,
line_threshold, coordinates,
segments_from, segments
buffers['coords_from'].offset,
buffers['line_threshold'].offset,
buffers['coordinates'].offset,
buffers['coordinates'].used / 4,
);
// copy result back
const wasm_points = new Float32Array(state.wasm.exports.memory.buffer, coordinates + state.coordinates.size * 4, segment_count * 2);
const wasm_ids = new Uint32Array(state.wasm.exports.memory.buffer, coordinates + (state.coordinates.size + segment_count * 2) * 4, segment_count);
// Use results without copying from WASM memory
const result_offset = clipped_indices + context.clipped_indices.size * 4
+ (context.clipped_indices.size + 1) * 4 + buffers['coordinates'].used / 2;
const wasm_points = new Float32Array(state.wasm.exports.memory.buffer,
result_offset, segment_count * 2);
const wasm_ids = new Uint32Array(state.wasm.exports.memory.buffer,
result_offset + segment_count * 2 * 4, segment_count);
context.instance_data_points.data = wasm_points;
context.instance_data_points.size = segment_count * 2;
@ -166,43 +207,3 @@ function do_lod(state, context) { @@ -166,43 +207,3 @@ function do_lod(state, context) {
return segments_head;
}
function write_coordinates(state, context) {
tv_ensure(context.instance_data_points, state.segments.size * 2);
tv_ensure(context.instance_data_ids, state.segments.size);
tv_clear(context.instance_data_points);
tv_clear(context.instance_data_ids);
const clipped = context.clipped_indices.data;
const segments_from = state.segments_from.data;
const segments = state.segments.data;
const coords = state.coordinates.data;
const events = state.events;
// TODO: move this loop to WASM
for (let i = 0; i < state.segments_from.size - 1; ++i) {
const stroke_index = clipped[i];
const coords_from = state.events[stroke_index].coords_from;
const from = segments_from[i];
const to = segments_from[i + 1];
for (let j = from; j < to; ++j) {
const base_this = segments[j];
const ax = coords[coords_from + base_this * 2 + 0];
const ay = coords[coords_from + base_this * 2 + 1];
tv_add(context.instance_data_points, ax);
tv_add(context.instance_data_points, ay);
// Pack 1 into highest bit of stroke_index if we should not draw a segemtn from this
// point to the next one
if (j != to - 1) {
tv_add(context.instance_data_ids, stroke_index);
} else {
tv_add(context.instance_data_ids, stroke_index | (1 << 31));
}
}
}
}

44
client/wasm/lod.c

@ -1,17 +1,33 @@ @@ -1,17 +1,33 @@
extern char __heap_base;
static int allocated;
static int allocated_static;
static int allocated_dynamic;
void
free_static(void)
{
allocated_static = 0;
}
void
total_free(void)
free_dynamic(void)
{
allocated = 0;
allocated_dynamic = 0;
}
void *
alloc(int size)
alloc_static(int size)
{
void *result = &__heap_base + allocated;
allocated += size;
void *result = &__heap_base + allocated_static;
allocated_static += size;
return(result);
}
void *
alloc_dynamic(int size)
{
void *result = &__heap_base + allocated_static + allocated_dynamic;
allocated_dynamic += size;
return(result);
}
@ -56,14 +72,18 @@ rdp_find_max(float *coordinates, float zoom, int coords_from, @@ -56,14 +72,18 @@ rdp_find_max(float *coordinates, float zoom, int coords_from,
int
do_lod(int *clipped_indices, int clipped_count, float zoom,
int *stroke_coords_from, int *stroke_coords_to,
float *line_threshold, float *coordinates,
int *segments_from, int *segments)
int *stroke_coords_from,
float *line_threshold,
float *coordinates,
int coordinates_count)
{
if (clipped_count == 0) {
return(0);
}
int *segments_from = alloc_dynamic((clipped_count + 1) * 4);
int *segments = alloc_dynamic(coordinates_count / 2 * 4);
int segments_head = 0;
int stack[4096]; // TODO: what's a reasonable max size for this?
@ -72,7 +92,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, @@ -72,7 +92,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
// TODO: convert to a proper CSR, save half the memory
int coords_from = stroke_coords_from[stroke_index];
int coords_to = stroke_coords_to[stroke_index];
int coords_to = stroke_coords_from[stroke_index + 1];
int point_count = (coords_to - coords_from) / 2;
@ -132,8 +152,8 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, @@ -132,8 +152,8 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
segments_from[clipped_count] = segments_head;
// Write actual coordinates (points) and stroke ids
float *points = alloc(segments_head * 2 * 4);
int *ids = alloc(segments_head * 4);
float *points = alloc_dynamic(segments_head * 2 * 4);
int *ids = alloc_dynamic(segments_head * 4);
int phead = 0;
int ihead = 0;

BIN
client/wasm/lod.wasm

Binary file not shown.

10
client/webgl_geometry.js

@ -38,12 +38,6 @@ function geometry_prepare_stroke(state) { @@ -38,12 +38,6 @@ function geometry_prepare_stroke(state) {
function geometry_write_instances(state, context) {
tv_ensure(state.segments_from, round_to_pow2(context.clipped_indices.size + 1, 4096));
tv_ensure(state.segments, round_to_pow2(state.coordinates.size / 2, 4096));
tv_clear(state.segments_from);
tv_clear(state.segments);
state.stats.rdp_max_count = 0;
state.stats.rdp_segments = 0;
@ -62,8 +56,8 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa @@ -62,8 +56,8 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa
context.stroke_data = ser_ensure_by(context.stroke_data, config.bytes_per_stroke);
tv_ensure(state.line_threshold, round_to_pow2(state.stroke_count, 4096));
tv_add(state.line_threshold, -1);
tv_add(state.wasm.buffers['line_threshold'].tv, -1);
state.wasm.buffers['line_threshold'].used += 4;
const color_u32 = stroke.color;
const r = (color_u32 >> 16) & 0xFF;

Loading…
Cancel
Save