Browse Source

Cache full stroke lists in BVH levels (massive speedup!). Remove "line_threshold" array, since it has very little effect.

ssao
A.Olokhtonov 8 months ago
parent
commit
6d9791d3e0
  1. 28
      client/bvh.js
  2. 1
      client/index.js
  3. 1
      client/lod_worker.js
  4. 4
      client/math.js
  5. 14
      client/speed.js
  6. 12
      client/wasm/lod.c
  7. BIN
      client/wasm/lod.wasm
  8. 3
      client/webgl_geometry.js

28
client/bvh.js

@ -54,8 +54,6 @@ function bvh_compute_sah(bvh, new_leaf, potential_sibling, only_parent = false) @@ -54,8 +54,6 @@ function bvh_compute_sah(bvh, new_leaf, potential_sibling, only_parent = false)
return cost;
}
// todo area func
function bvh_find_best_sibling(bvh, leaf_index) {
// branch and bound
@ -171,6 +169,13 @@ function bvh_intersect_quad(bvh, quad, result_buffer) { @@ -171,6 +169,13 @@ function bvh_intersect_quad(bvh, quad, result_buffer) {
continue;
}
if (node.is_fullnode) {
if (quad_fully_inside(quad, node.bbox)) {
tv_append(result_buffer, node.stroke_indices.data);
continue;
}
}
if (node.is_leaf) {
tv_add(result_buffer, node.stroke_index);
} else {
@ -207,7 +212,7 @@ function bvh_clip(state, context) { @@ -207,7 +212,7 @@ function bvh_clip(state, context) {
tv_data(context.clipped_indices).sort(); // we need to draw back to front still!
}
function bvh_construct_rec(bvh, vertical, strokes) {
function bvh_construct_rec(bvh, vertical, strokes, depth) {
if (strokes.length > 1) {
// internal
let sorted_strokes;
@ -221,8 +226,8 @@ function bvh_construct_rec(bvh, vertical, strokes) { @@ -221,8 +226,8 @@ function bvh_construct_rec(bvh, vertical, strokes) {
const node_index = bvh_make_internal(bvh);
const left_of_split_count = Math.floor(strokes.length / 2);
const child1 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(0, left_of_split_count));
const child2 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(left_of_split_count, sorted_strokes.length));
const child1 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(0, left_of_split_count), depth + 1);
const child2 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(left_of_split_count, sorted_strokes.length), depth + 1);
bvh.nodes[child1].parent_index = node_index;
bvh.nodes[child2].parent_index = node_index;
@ -231,6 +236,17 @@ function bvh_construct_rec(bvh, vertical, strokes) { @@ -231,6 +236,17 @@ function bvh_construct_rec(bvh, vertical, strokes) {
bvh.nodes[node_index].child2 = child2;
bvh.nodes[node_index].bbox = quad_union(bvh.nodes[child1].bbox, bvh.nodes[child2].bbox);
if (depth === config.bvh_fullnode_depth) {
const indices = tv_create(Int32Array, round_to_pow2(strokes.length, 32));
for (let i = 0; i < strokes.length; ++i) {
tv_add(indices, strokes[i].index);
}
bvh.nodes[node_index].stroke_indices = indices;
bvh.nodes[node_index].is_fullnode = true;
}
return node_index;
} else {
// leaf
@ -240,6 +256,6 @@ function bvh_construct_rec(bvh, vertical, strokes) { @@ -240,6 +256,6 @@ function bvh_construct_rec(bvh, vertical, strokes) {
function bvh_construct(state) {
if (state.events.length > 0) {
state.bvh.root = bvh_construct_rec(state.bvh, true, state.events);
state.bvh.root = bvh_construct_rec(state.bvh, true, state.events, 0);
}
}

1
client/index.js

@ -24,6 +24,7 @@ const config = { @@ -24,6 +24,7 @@ const config = {
initial_dynamic_bytes: 4096,
stroke_texture_size: 1024, // means no more than 1024^2 = 1M strokes in total (this is a LOT. HMH blackboard has like 80K)
dynamic_stroke_texture_size: 128, // means no more than 128^2 = 16K dynamic strokes at once
bvh_fullnode_depth: 5,
benchmark: {
zoom: 0.00001,
offset: { x: 425, y: -1195 },

1
client/lod_worker.js

@ -19,7 +19,6 @@ function work(indices_base, indices_count, zoom, offsets) { @@ -19,7 +19,6 @@ function work(indices_base, indices_count, zoom, offsets) {
exports.do_lod(
indices_base, indices_count, zoom,
offsets['coords_from'],
offsets['line_threshold'],
offsets['xs'],
offsets['ys'],
offsets['pressures'],

4
client/math.js

@ -282,8 +282,8 @@ function quads_intersect(a, b) { @@ -282,8 +282,8 @@ function quads_intersect(a, b) {
return false;
}
function quad_fully_onscreen(screen, bbox) {
if (screen.x1 < bbox.x1 && screen.x2 > bbox.x2 && screen.y1 < bbox.y1 && screen.y2 > bbox.y2) {
function quad_fully_inside(outer, inner) {
if (outer.x1 < inner.x1 && outer.x2 > inner.x2 && outer.y1 < inner.y1 && outer.y2 > inner.y2) {
return true;
}

14
client/speed.js

@ -76,10 +76,6 @@ async function init_wasm(state) { @@ -76,10 +76,6 @@ async function init_wasm(state) {
'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes),
'used': 0,
},
'line_threshold': {
'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes),
'used': 0,
},
'pressures': {
'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes / 8),
'used': 0
@ -94,8 +90,6 @@ async function init_wasm(state) { @@ -94,8 +90,6 @@ async function init_wasm(state) {
mem, state.wasm.buffers['ys'].offset);
state.wasm.buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4,
mem, state.wasm.buffers['coords_from'].offset);
state.wasm.buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4,
mem, state.wasm.buffers['line_threshold'].offset);
state.wasm.buffers['pressures'].tv = tv_create_on(Uint8Array, state.wasm.coords_bytes / 8,
mem, state.wasm.buffers['pressures'].offset);
@ -107,7 +101,6 @@ function wasm_ensure_by(state, nstrokes, ncoords) { @@ -107,7 +101,6 @@ function wasm_ensure_by(state, nstrokes, ncoords) {
const buffers = state.wasm.buffers;
const old_coords_from_offset = buffers['coords_from'].offset;
const old_line_threshold_offset = buffers['line_threshold'].offset;
const old_pressures_offset = buffers['pressures'].offset;
const old_size_coords = state.wasm.coords_bytes;
@ -139,20 +132,17 @@ function wasm_ensure_by(state, nstrokes, ncoords) { @@ -139,20 +132,17 @@ function wasm_ensure_by(state, nstrokes, ncoords) {
buffers['xs'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2);
buffers['ys'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2);
buffers['coords_from'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes);
buffers['line_threshold'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes);
buffers['pressures'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 8);
buffers['xs'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, buffers['xs'].offset);
buffers['ys'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, buffers['ys'].offset);
buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, mem, buffers['coords_from'].offset);
buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4, mem, buffers['line_threshold'].offset);
buffers['pressures'].tv = tv_create_on(Uint8Array, state.wasm.coords_bytes / 8, mem, buffers['pressures'].offset);
// TODO: this should have been automatic maybe?
buffers['xs'].tv.size = buffers['xs'].used / 4;
buffers['ys'].tv.size = buffers['ys'].used / 4;
buffers['coords_from'].tv.size = buffers['coords_from'].used / 4;
buffers['line_threshold'].tv.size = buffers['line_threshold'].used / 4;
buffers['pressures'].tv.size = buffers['pressures'].used;
const tmp = new Uint8Array(Math.max(state.wasm.coords_bytes, state.wasm.stroke_bytes)); // TODO: needed?
@ -161,9 +151,6 @@ function wasm_ensure_by(state, nstrokes, ncoords) { @@ -161,9 +151,6 @@ function wasm_ensure_by(state, nstrokes, ncoords) {
tmp.set(new Uint8Array(mem, old_pressures_offset, buffers['pressures'].used));
memv.set(new Uint8Array(tmp.buffer, 0, buffers['pressures'].used), buffers['pressures'].offset);
tmp.set(new Uint8Array(mem, old_line_threshold_offset, old_size_strokes));
memv.set(new Uint8Array(tmp.buffer, 0, old_size_strokes), buffers['line_threshold'].offset);
tmp.set(new Uint8Array(mem, old_coords_from_offset, old_size_strokes));
memv.set(new Uint8Array(tmp.buffer, 0, old_size_strokes), buffers['coords_from'].offset);
}
@ -187,7 +174,6 @@ async function do_lod(state, context) { @@ -187,7 +174,6 @@ async function do_lod(state, context) {
const indices_per_thread = Math.floor(context.clipped_indices.size / state.wasm.workers.length);
const offsets = {
'coords_from': buffers['coords_from'].offset,
'line_threshold': buffers['line_threshold'].offset,
'xs': buffers['xs'].offset,
'ys': buffers['ys'].offset,
'pressures': buffers['pressures'].offset,

12
client/wasm/lod.c

@ -197,7 +197,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo @@ -197,7 +197,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
void
do_lod(int *clipped_indices, int clipped_count, float zoom,
int *stroke_coords_from,
float *line_threshold,
float *xs,
float *ys,
unsigned char *pressures,
@ -236,13 +235,6 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, @@ -236,13 +235,6 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
// Basic CSR crap
segments_from[i] = segments_head;
if (zoom < line_threshold[stroke_index]) {
// Fast paths for collapsing to a single line segment
segments[segments_head++] = 0;
segments[segments_head++] = point_count - 1;
continue;
}
int segment_count = 2;
int stack_head = 0;
@ -280,10 +272,6 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, @@ -280,10 +272,6 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
}
segments[segments_head++] = point_count - 1;
if (segment_count == 2 && zoom > line_threshold[stroke_index]) {
line_threshold[stroke_index] = zoom;
}
}
segments_from[clipped_count] = segments_head;

BIN
client/wasm/lod.wasm

Binary file not shown.

3
client/webgl_geometry.js

@ -56,9 +56,6 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa @@ -56,9 +56,6 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa
context.stroke_data = ser_ensure_by(context.stroke_data, config.bytes_per_stroke);
tv_add(state.wasm.buffers['line_threshold'].tv, -1);
state.wasm.buffers['line_threshold'].used += 4;
const color_u32 = stroke.color;
const r = (color_u32 >> 16) & 0xFF;
const g = (color_u32 >> 8) & 0xFF;

Loading…
Cancel
Save