From 71f1016a40cbd11d9da10a1721ce8d5e6c47d7d8 Mon Sep 17 00:00:00 2001 From: aolo2 Date: Sat, 13 Jan 2024 20:07:06 +0300 Subject: [PATCH] Store WASM-processed data in WASM memory --- README.md | 4 +- client/aux.js | 9 ++ client/client_recv.js | 23 ++--- client/index.js | 7 -- client/math.js | 55 ++---------- client/speed.js | 179 ++++++++++++++++++++------------------- client/wasm/lod.c | 44 +++++++--- client/wasm/lod.wasm | Bin 1558 -> 1782 bytes client/webgl_geometry.js | 10 +-- 9 files changed, 155 insertions(+), 176 deletions(-) diff --git a/README.md b/README.md index 814ab54..7f47b08 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ Release: + Reuse points, pack "nodraw" in high bit of stroke id (probably have at least one more bit, so up to 4 flag configurations) + Draw dynamic data (strokes in progress) + Webassembly for core LOD generation - - Webassembly for final buffers - - Do not copy memory from wasm and back + + Webassembly for final buffers + + Do not copy memory to wasm, instead use wasm memory to store data in the first place - Z-prepass fringe bug (also, when do we enable the prepass?) - Textured quads (pictures, code already written in older version) - Resize and move pictures (draw handles) diff --git a/client/aux.js b/client/aux.js index 81c39db..797c7c2 100644 --- a/client/aux.js +++ b/client/aux.js @@ -121,6 +121,15 @@ function tv_create(class_name, capacity) { }; } +function tv_create_on(class_name, capacity, buffer, offset) { + return { + 'class_name': class_name, + 'data': new class_name(buffer, offset, capacity), + 'capacity': capacity, + 'size': 0, + }; +} + function tv_data(tv) { return tv.data.subarray(0, tv.size); } diff --git a/client/client_recv.js b/client/client_recv.js index 10d2d40..3b01a78 100644 --- a/client/client_recv.js +++ b/client/client_recv.js @@ -100,19 +100,20 @@ function des_event(d, state = null) { const color = des_u32(d); const coords = des_f32array(d, point_count * 2); - - tv_ensure_by(state.coordinates, coords.length); - tv_ensure_by(state.coords_from, 1); - tv_ensure_by(state.coords_to, 1); - tv_add(state.coords_from, state.coordinates.size); - tv_add(state.coords_to, state.coordinates.size + point_count * 2); + wasm_ensure_by(state, 1, coords.length); + + const coordinates = state.wasm.buffers['coordinates']; + + tv_add(state.wasm.buffers['coords_from'].tv, coordinates.tv.size + point_count * 2); + state.wasm.buffers['coords_from'].used += 4; // 4 bytes, not 4 ints // TODO: remove, this is duplicate data - event.coords_from = state.coordinates.size; - event.coords_to = state.coordinates.size + point_count * 2; + event.coords_from = coordinates.tv.size; + event.coords_to = coordinates.tv.size + point_count * 2; - tv_append(state.coordinates, coords); + tv_append(coordinates.tv, coords); + state.wasm.buffers['coordinates'].used += point_count * 2 * 4; event.stroke_id = stroke_id; @@ -412,8 +413,8 @@ async function handle_message(state, context, d) { const event_count = des_u32(d); const user_count = des_u32(d); const total_points = des_u32(d); - - state.coordinates = tv_create(Float32Array, round_to_pow2(total_points * 2, 4096)); + + wasm_ensure_by(state, event_count, round_to_pow2(total_points * 2)); if (config.debug_print) console.debug(`${event_count} events in init`); diff --git a/client/index.js b/client/index.js index 310686a..21a7a32 100644 --- a/client/index.js +++ b/client/index.js @@ -177,13 +177,6 @@ async function main() { 'starting_index': 0, 'total_points': 0, - 'coordinates': tv_create(Float32Array, 4096), - 'line_threshold': tv_create(Float32Array, 4096), - 'coords_from': tv_create(Uint32Array, 4096), - 'coords_to': tv_create(Uint32Array, 4096), - 'segments_from': tv_create(Uint32Array, 4096), - 'segments': tv_create(Uint32Array, 4096), - 'bvh': { 'nodes': [], 'root': null, diff --git a/client/math.js b/client/math.js index 5ccdabd..729aaa3 100644 --- a/client/math.js +++ b/client/math.js @@ -16,47 +16,6 @@ function canvas_to_screen(state, p) { return {'x': xs, 'y': ys}; } -/* -function rdp_find_max(state, zoom, stroke, start, end) { - // Finds a point from the range [start, end) with the maximum distance from the line (start--end) that is also further than EPS - const EPS = 1.0 / zoom; - - let result = -1; - let max_dist = 0; - - const ax = state.coordinates.data[stroke.coords_from + start * 2 + 0]; - const ay = state.coordinates.data[stroke.coords_from + start * 2 + 1]; - const bx = state.coordinates.data[stroke.coords_from + end * 2 + 0]; - const by = state.coordinates.data[stroke.coords_from + end * 2 + 1]; - - const dx = bx - ax; - const dy = by - ay; - - const dist_ab = Math.sqrt(dx * dx + dy * dy); - const dir_nx = dy / dist_ab; - const dir_ny = -dx / dist_ab; - - for (let i = start + 1; i < end; ++i) { - const px = state.coordinates.data[stroke.coords_from + i * 2 + 0]; - const py = state.coordinates.data[stroke.coords_from + i * 2 + 1]; - - const apx = px - ax; - const apy = py - ay; - - const dist = Math.abs(apx * dir_nx + apy * dir_ny); - - if (dist > EPS && dist > max_dist) { - result = i; - max_dist = dist; - } - } - - state.stats.rdp_max_count++; - state.stats.rdp_segments += end - start - 1; - - return result; -} -*/ function process_rdp_indices_r(state, zoom, mask, stroke, start, end) { // Looks like the recursive implementation spends most of its time in the function call overhead @@ -294,15 +253,17 @@ function segment_interesects_quad(a, b, quad_topleft, quad_bottomright, quad_top function stroke_bbox(state, stroke) { const radius = stroke.width / 2; - let min_x = state.coordinates.data[stroke.coords_from + 0] - radius; - let max_x = state.coordinates.data[stroke.coords_from + 0] + radius; + const coordinates = state.wasm.buffers['coordinates'].tv.data; + + let min_x = coordinates[stroke.coords_from + 0] - radius; + let max_x = coordinates[stroke.coords_from + 0] + radius; - let min_y = state.coordinates.data[stroke.coords_from + 1] - radius; - let max_y = state.coordinates.data[stroke.coords_from + 1] + radius; + let min_y = coordinates[stroke.coords_from + 1] - radius; + let max_y = coordinates[stroke.coords_from + 1] + radius; for (let i = stroke.coords_from + 2; i < stroke.coords_to; i += 2) { - const px = state.coordinates.data[i + 0]; - const py = state.coordinates.data[i + 1]; + const px = coordinates[i + 0]; + const py = coordinates[i + 1]; min_x = Math.min(min_x, px - radius); min_y = Math.min(min_y, py - radius); diff --git a/client/speed.js b/client/speed.js index efba904..cbbf007 100644 --- a/client/speed.js +++ b/client/speed.js @@ -1,77 +1,118 @@ async function init_wasm(state) { const results = await WebAssembly.instantiateStreaming(fetch('wasm/lod.wasm')); + state.wasm.exports = results.instance.exports; state.wasm.exports.memory.grow(4096); + + state.wasm.stroke_bytes = 4096; + state.wasm.coords_bytes = 4096; + state.wasm.buffers = { + 'coordinates': { + 'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes), + 'used': 0 + }, + 'coords_from': { + 'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes), + 'used': 0, + }, + 'line_threshold': { + 'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes), + 'used': 0, + }, + }; + + const mem = state.wasm.exports.memory.buffer; + + state.wasm.buffers['coordinates'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 4, + mem, state.wasm.buffers['coordinates'].offset); + state.wasm.buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, + mem, state.wasm.buffers['coords_from'].offset); + state.wasm.buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4, + mem, state.wasm.buffers['line_threshold'].offset); + + + tv_add(state.wasm.buffers['coords_from'].tv, 0); + state.wasm.buffers['coords_from'].used = 4; } -function rdp_find_max(state, zoom, coords_from, start, end) { - // Finds a point from the range [start, end) with the maximum distance from the line (start--end) that is also further than EPS - const EPS = 1.0 / zoom; +function wasm_ensure_by(state, nstrokes, ncoords) { + const buffers = state.wasm.buffers; - let result = -1; - let max_dist = 0; + const old_coords_from_offset = buffers['coords_from'].offset; + const old_line_threshold_offset = buffers['line_threshold'].offset; - const ax = state.coordinates.data[coords_from + start * 2 + 0]; - const ay = state.coordinates.data[coords_from + start * 2 + 1]; - const bx = state.coordinates.data[coords_from + end * 2 + 0]; - const by = state.coordinates.data[coords_from + end * 2 + 1]; + const old_size_coords = state.wasm.coords_bytes; + const old_size_strokes = state.wasm.stroke_bytes; - const dx = bx - ax; - const dy = by - ay; + let realloc = false; - const dist_ab = Math.sqrt(dx * dx + dy * dy); - const dir_nx = dy / dist_ab; - const dir_ny = -dx / dist_ab; + if (buffers['coordinates'].used + ncoords * 4 > state.wasm.coords_bytes) { + state.wasm.coords_bytes += round_to_pow2(ncoords, 4096 * 16); // 1 wasm page (although it doesn't matter here) + realloc = true; + } - for (let i = start + 1; i < end; ++i) { - const px = state.coordinates.data[coords_from + i * 2 + 0]; - const py = state.coordinates.data[coords_from + i * 2 + 1]; - - const apx = px - ax; - const apy = py - ay; + if (buffers['coords_from'].used + nstrokes * 4 > state.wasm.stroke_bytes) { + state.wasm.stroke_bytes += round_to_pow2(nstrokes, 4096 * 16); + realloc = true; + } - const dist = Math.abs(apx * dir_nx + apy * dir_ny); + if (realloc) { + // TODO: we do memory.grow() somewhere here if needed - if (dist > EPS && dist > max_dist) { - result = i; - max_dist = dist; - } - } + state.wasm.exports.free_static(); - state.stats.rdp_max_count++; - state.stats.rdp_segments += end - start - 1; + const mem = state.wasm.exports.memory.buffer; + const memv = new Uint8Array(mem); - return result; + buffers['coordinates'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes); + buffers['coords_from'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes); + buffers['line_threshold'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes); + + buffers['coordinates'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 4, mem, buffers['coordinates'].offset); + buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, mem, buffers['coords_from'].offset); + buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4, mem, buffers['line_threshold'].offset); + + buffers['coordinates'].tv.size = buffers['coordinates'].used / 4; + buffers['coords_from'].tv.size = buffers['coords_from'].used / 4; + buffers['line_threshold'].tv.size = buffers['line_threshold'].used / 4; + + const tmp = new Uint8Array(state.wasm.stroke_bytes); // TODO: needed? + + // First we move the line_threshold, only then coords_from (otherwise we will overwrite) + tmp.set(new Uint8Array(mem, old_line_threshold_offset, old_size_strokes)); + memv.set(tmp, buffers['coordinates'].offset + state.wasm.coords_bytes + state.wasm.stroke_bytes); + + tmp.set(new Uint8Array(mem, old_coords_from_offset, old_size_strokes)); + memv.set(tmp, buffers['coordinates'].offset + state.wasm.coords_bytes); + } } function do_lod_wasm(state, context) { - state.wasm.exports.total_free(); - - const clipped_indices = state.wasm.exports.alloc(context.clipped_indices.size * 4); - const stroke_coords_from = state.wasm.exports.alloc(state.coords_from.size * 4); - const stroke_coords_to = state.wasm.exports.alloc(state.coords_to.size * 4); - const line_threshold = state.wasm.exports.alloc(state.line_threshold.size * 4); - const segments_from = state.wasm.exports.alloc((context.clipped_indices.size + 1) * 4); - const segments = state.wasm.exports.alloc(state.segments.capacity * 4); - const coordinates = state.wasm.exports.alloc(state.coordinates.size * 4); - const mem = new Uint8Array(state.wasm.exports.memory.buffer); + state.wasm.exports.free_dynamic(); + const clipped_indices = state.wasm.exports.alloc_dynamic(context.clipped_indices.size * 4); + const mem = new Uint8Array(state.wasm.exports.memory.buffer); + + // Dynamic input data that should (by design) never be too big mem.set(tv_bytes(context.clipped_indices), clipped_indices); - mem.set(tv_bytes(state.coords_from), stroke_coords_from); - mem.set(tv_bytes(state.coords_to), stroke_coords_to); - mem.set(tv_bytes(state.line_threshold), line_threshold); - mem.set(tv_bytes(state.coordinates), coordinates); + const buffers = state.wasm.buffers; const segment_count = state.wasm.exports.do_lod( clipped_indices, context.clipped_indices.size, state.canvas.zoom, - stroke_coords_from, stroke_coords_to, - line_threshold, coordinates, - segments_from, segments + buffers['coords_from'].offset, + buffers['line_threshold'].offset, + buffers['coordinates'].offset, + buffers['coordinates'].used / 4, ); - // copy result back - const wasm_points = new Float32Array(state.wasm.exports.memory.buffer, coordinates + state.coordinates.size * 4, segment_count * 2); - const wasm_ids = new Uint32Array(state.wasm.exports.memory.buffer, coordinates + (state.coordinates.size + segment_count * 2) * 4, segment_count); + // Use results without copying from WASM memory + const result_offset = clipped_indices + context.clipped_indices.size * 4 + + (context.clipped_indices.size + 1) * 4 + buffers['coordinates'].used / 2; + + const wasm_points = new Float32Array(state.wasm.exports.memory.buffer, + result_offset, segment_count * 2); + const wasm_ids = new Uint32Array(state.wasm.exports.memory.buffer, + result_offset + segment_count * 2 * 4, segment_count); context.instance_data_points.data = wasm_points; context.instance_data_points.size = segment_count * 2; @@ -166,43 +207,3 @@ function do_lod(state, context) { return segments_head; } - -function write_coordinates(state, context) { - tv_ensure(context.instance_data_points, state.segments.size * 2); - tv_ensure(context.instance_data_ids, state.segments.size); - - tv_clear(context.instance_data_points); - tv_clear(context.instance_data_ids); - - const clipped = context.clipped_indices.data; - const segments_from = state.segments_from.data; - const segments = state.segments.data; - const coords = state.coordinates.data; - const events = state.events; - - // TODO: move this loop to WASM - for (let i = 0; i < state.segments_from.size - 1; ++i) { - const stroke_index = clipped[i]; - const coords_from = state.events[stroke_index].coords_from; - const from = segments_from[i]; - const to = segments_from[i + 1]; - - for (let j = from; j < to; ++j) { - const base_this = segments[j]; - - const ax = coords[coords_from + base_this * 2 + 0]; - const ay = coords[coords_from + base_this * 2 + 1]; - - tv_add(context.instance_data_points, ax); - tv_add(context.instance_data_points, ay); - - // Pack 1 into highest bit of stroke_index if we should not draw a segemtn from this - // point to the next one - if (j != to - 1) { - tv_add(context.instance_data_ids, stroke_index); - } else { - tv_add(context.instance_data_ids, stroke_index | (1 << 31)); - } - } - } -} diff --git a/client/wasm/lod.c b/client/wasm/lod.c index eddf487..7f83927 100644 --- a/client/wasm/lod.c +++ b/client/wasm/lod.c @@ -1,17 +1,33 @@ extern char __heap_base; -static int allocated; + +static int allocated_static; +static int allocated_dynamic; + +void +free_static(void) +{ + allocated_static = 0; +} void -total_free(void) +free_dynamic(void) { - allocated = 0; + allocated_dynamic = 0; } void * -alloc(int size) +alloc_static(int size) { - void *result = &__heap_base + allocated; - allocated += size; + void *result = &__heap_base + allocated_static; + allocated_static += size; + return(result); +} + +void * +alloc_dynamic(int size) +{ + void *result = &__heap_base + allocated_static + allocated_dynamic; + allocated_dynamic += size; return(result); } @@ -56,13 +72,17 @@ rdp_find_max(float *coordinates, float zoom, int coords_from, int do_lod(int *clipped_indices, int clipped_count, float zoom, - int *stroke_coords_from, int *stroke_coords_to, - float *line_threshold, float *coordinates, - int *segments_from, int *segments) + int *stroke_coords_from, + float *line_threshold, + float *coordinates, + int coordinates_count) { if (clipped_count == 0) { return(0); } + + int *segments_from = alloc_dynamic((clipped_count + 1) * 4); + int *segments = alloc_dynamic(coordinates_count / 2 * 4); int segments_head = 0; int stack[4096]; // TODO: what's a reasonable max size for this? @@ -72,7 +92,7 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, // TODO: convert to a proper CSR, save half the memory int coords_from = stroke_coords_from[stroke_index]; - int coords_to = stroke_coords_to[stroke_index]; + int coords_to = stroke_coords_from[stroke_index + 1]; int point_count = (coords_to - coords_from) / 2; @@ -132,8 +152,8 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, segments_from[clipped_count] = segments_head; // Write actual coordinates (points) and stroke ids - float *points = alloc(segments_head * 2 * 4); - int *ids = alloc(segments_head * 4); + float *points = alloc_dynamic(segments_head * 2 * 4); + int *ids = alloc_dynamic(segments_head * 4); int phead = 0; int ihead = 0; diff --git a/client/wasm/lod.wasm b/client/wasm/lod.wasm index 67a51637c639c79427e1b1f192d1e25b9f18de34..7ea392c821a944692e0fbda343d82ce738a3e3fa 100755 GIT binary patch literal 1782 zcmZux%~RV(6yIIhD;e1_5F79Z_-Vlbn!+?akoFXmwrMAao_Z}p*b1=tgIZz&Gi9Vh z4$QQNG?0I)=l%`-H`@1BHVrcsBWw44{@(9B9t-0kXN>Whb;#HuPkDN1r|ETy4^J&y zl8%d_#hH*V=2GszzbUvWn|(5eE;r`)eg2#4b48Ya7#VTb{+rLsVKf|1t^`{Mg14AF z=!Jtp&`ZXXm@)UwB#MGK36s+v<7IQ}UyZ^cUPT!V$Gw@>a)aO`3eSV1Fpex<$(1wX zg6xlj!MM+gOpIF685F8K2 zN8upPAkow0BTpNZc^$dZLEj`i8btZEH1}so3g%XrkAHh2yUDWuZtu)(gY%TMTe((o zV0vl^MDn|gX?OSD$T&@n%z0*XA3eN@h{w8q9xRVer_b)!^(20#e5)j&CIxwSq!Si$vr#KEfDNE)Y~% zt0j1?C3b^=HEYNS7}!H^^U+SH=y<7jtaox-o1@G};% zK@8=Bn8mLCOkD*KU*V3z=|S^a&aq>>Y+4z5!z>JNTsAJ?29Eeo@_%+V|8!-G2OHaZ zu&@gcH*|5jpzTG@s#tiQ!ek32`JdnH@E<>^dtRnORE} z7}b&`g{nTcGwFYLUvV;Vx8!x+yes}cl>eo@H00$X*hxCIzvXgu5>00Fw@P?H@CJtm z!*DzfhUsjc2w|tQG#m%7=TRi&5)8AUP)-n>M&U(p943(^tD{*ko{fan41!TI3r@r7 zXdGF}#x_jDAexRW3n57w4$tBF#;U-x+?}4CoLWn*AUGM%j&V+rv5LqdKS$PJECH6r zI%3oCcpR16hW%H)qJ+~DK7M;Df1l^V*_JZn{Z6?zl0>|)5%cvf7qRnCty;p?O2qZ3OCG-m1JkGOx%SAVe$3MPB72+Nb)QzrEhLu&l}S4&KhLk`dV}iJ zEsR~VsI$ng2Ss%rN?X@%Q#sV9m6)p31tMRjG>F;rkrL08z_GA(XoGqK)K5J>E719s zSa+%K*BOGxN+4vVO8^$@c2hWxLluBpMmE#q%2@DNK#T=?Sn8nsUP&&{7A*1v2!QIG z`)UFzY+r)OqK!+4G7r80bDxnq{1GM8X_e6yz!1g_ZoClBEJ2MzXFlIkbzkXCYL*Ou zKud3^eRO2QtoZWhmZTL%HlRjqJpU>-HUn#80vfPf$7X|1Fl$_a4(Oi7fxR}pIm&0hD$R@;LlqAt;_xP=B z{tz9Xg;N@i!|93hbnnU6Pxff%>35IBmu?!)Pogw<9fj%TJWAAKcXC> 16) & 0xFF;