diff --git a/client/index.js b/client/index.js index a986914..f04815c 100644 --- a/client/index.js +++ b/client/index.js @@ -227,6 +227,7 @@ async function main() { 'instance_data_points': tv_create(Float32Array, 4096), 'instance_data_ids': tv_create(Uint32Array, 4096), 'instance_data_pressures': tv_create(Uint8Array, 4096), + 'instance_data_batches': tv_create(Uint32Array, 4096), 'dynamic_instance_points': tv_create(Float32Array, 4096), 'dynamic_instance_pressure': tv_create(Uint8Array, 4096), diff --git a/client/lod_worker.js b/client/lod_worker.js index c21ee3e..2dff3bb 100644 --- a/client/lod_worker.js +++ b/client/lod_worker.js @@ -25,6 +25,7 @@ function work(indices_base, indices_count, zoom, offsets) { offsets['pressures'], offsets['result_buffers'] + thread_id * 4, offsets['result_counts'] + thread_id * 4, + offsets['result_batch_counts'] + thread_id * 4, ); } catch (e) { console.error('WASM:', e); diff --git a/client/speed.js b/client/speed.js index 18c3ca4..906c64c 100644 --- a/client/speed.js +++ b/client/speed.js @@ -43,7 +43,7 @@ async function init_wasm(state) { env: { 'memory': memory } }); - const nworkers = navigator.hardwareConcurrency; + const nworkers = 1; //navigator.hardwareConcurrency; state.wasm.exports = master_wasm.instance.exports; state.wasm.heap_base = state.wasm.exports.alloc_static(0); @@ -185,6 +185,7 @@ async function do_lod(state, context) { const buffers = state.wasm.buffers; const result_buffers = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4); const result_counts = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4); + const result_batch_counts = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4); const clipped_indices = state.wasm.exports.alloc_dynamic(context.clipped_indices.size * 4); const mem = new Uint8Array(state.wasm.memory.buffer); @@ -202,6 +203,7 @@ async function do_lod(state, context) { 'pressures': buffers['pressures'].offset, 'result_buffers': result_buffers, 'result_counts': result_counts, + 'result_batch_counts': result_batch_counts, }; const jobs = []; @@ -226,11 +228,13 @@ async function do_lod(state, context) { const result_offset = state.wasm.exports.merge_results( result_counts, + result_batch_counts, result_buffers, state.wasm.workers.length ); const segment_count = new Int32Array(state.wasm.memory.buffer, result_counts, 1)[0]; // by convention + const batch_count = new Int32Array(state.wasm.memory.buffer, result_batch_counts, 1)[0]; // by convention // Use results without copying from WASM memory const wasm_points = new Float32Array(state.wasm.memory.buffer, @@ -238,16 +242,25 @@ async function do_lod(state, context) { const wasm_ids = new Uint32Array(state.wasm.memory.buffer, result_offset + segment_count * 2 * 4, segment_count); const wasm_pressures = new Uint8Array(state.wasm.memory.buffer, - result_offset + segment_count * 2 * 4 + segment_count * 4, segment_count); + result_offset + segment_count * 3 * 4, segment_count); + const wasm_batches = new Int32Array(state.wasm.memory.buffer, + result_offset + round_to_pow2(segment_count * (3 * 4 + 1), 4), batch_count * 2); context.instance_data_points.data = wasm_points; context.instance_data_points.size = segment_count * 2; + context.instance_data_points.capacity = segment_count * 2; context.instance_data_ids.data = wasm_ids; context.instance_data_ids.size = segment_count; + context.instance_data_ids.capacity = segment_count; context.instance_data_pressures.data = wasm_pressures; context.instance_data_pressures.size = segment_count; + context.instance_data_pressures.capacity = segment_count; + + context.instance_data_batches.data = wasm_batches; + context.instance_data_batches.size = batch_count * 2; + context.instance_data_batches.capacity = batch_count * 2; return segment_count; } diff --git a/client/wasm/lod.c b/client/wasm/lod.c index c1e21e7..d494367 100644 --- a/client/wasm/lod.c +++ b/client/wasm/lod.c @@ -202,7 +202,8 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, float *ys, unsigned char *pressures, char **result_buffer, - int *result_count) + int *result_count, + int *result_batch_count) { if (clipped_count == 0) { result_count[0] = 0; @@ -281,16 +282,18 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, // Write actual coordinates (points) and stroke ids // Do this in one allocation so that they're not interleaved between threads - char *output = alloc_dynamic(segments_head * (3 * 4 + 1) + clipped_count * 4); + char *output = alloc_dynamic(round_to_pow2(segments_head * (3 * 4 + 1), 4) + clipped_count * 4 * 2); // max two ints per stroke for batch info (realistically, much less) float *points = (float *) output; int *ids = (int *) (output + segments_head * 4 * 2); unsigned char *pressures_res = (unsigned char *) (output + segments_head * 4 * 3); - unsigned int *batches = (unsigned int *) (output + segments_head * (4 * 3 + 1)); + int *batches = (int *) (output + round_to_pow2(segments_head * (4 * 3 + 1), 4)); int phead = 0; int ihead = 0; float sqrt_zoom = __builtin_sqrtf(zoom); - int last_lod = -1; + int last_lod = -100; + int batch_count = 0; + int batch_size = 0; for (int i = 0; i < clipped_count; ++i) { int stroke_index = clipped_indices[i]; @@ -316,48 +319,76 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, } } + int segment_count = to - from; + // Compute recommended LOD level, add to current batch or start new batch - float sqrt_width = __builtin_sqrtf(width[stroke_index]); // TOOD: pass in stroke width - int lod = __builtin_round(sqrt_zoom * sqrt_width * 0.3333f); + float sqrt_width = __builtin_sqrtf(width[stroke_index]); + int lod = __builtin_ceil(sqrt_zoom * sqrt_width * 0.3333f); // TODO: round -#if 0 - if (__builtin_abs(lod - last_lod) > 2) { + if (lod > 7) lod = 7; + + if (batch_size > 0 && __builtin_abs(lod - last_lod) > 2) { // Start new batch - } else { - // Add to existing batch + batches[batch_count * 2 + 0] = batch_size; + batches[batch_count * 2 + 1] = last_lod; + ++batch_count; + batch_size = 0; } + batch_size += segment_count; last_lod = lod; -#endif } - + + if (batch_size > 0) { + batches[batch_count * 2 + 0] = batch_size; + batches[batch_count * 2 + 1] = last_lod; + ++batch_count; + } + result_buffer[0] = output; result_count[0] = segments_head; + result_batch_count[0] = batch_count; } // NOT thread-safe, only call from one thread char * -merge_results(int *segment_counts, char **buffers, int nthreads) +merge_results(int *segment_counts, int *batch_counts, char **buffers, int nthreads) { int total_segments = 0; + int total_batches = 0; for (int i = 0; i < nthreads; ++i) { total_segments += segment_counts[i]; + total_batches += batch_counts[i]; } - char *merged = alloc_dynamic(total_segments * (3 * 4 + 1)); + char *merged = alloc_dynamic(round_to_pow2(total_segments * (3 * 4 + 1), 4) + total_batches * 4); float *points = (float *) merged; int *ids = (int *) (merged + total_segments * 4 * 2); unsigned char *pressures = (unsigned char *) (merged + total_segments * 4 * 3); + int *batches = (int *) (merged + round_to_pow2(total_segments * (3 * 4 + 1), 4)); + int batch_base = 0; + int last_batch_lod = -99; + int bhead = 0; + int written_batches = 0; for (int i = 0; i < nthreads; ++i) { int segments = segment_counts[i]; + int nbatches = batch_counts[i]; + int *thread_batches = (int *) (buffers[i] + round_to_pow2(segments * (4 * 3 + 1), 4)); + if (segments > 0) { __builtin_memcpy(points, buffers[i], segments * 4 * 2); __builtin_memcpy(ids, buffers[i] + segments * 4 * 2, segments * 4); __builtin_memcpy(pressures, buffers[i] + segments * 4 * 3, segments); + for (int j = 0; j < nbatches * 2; j += 2) { + batches[bhead++] = written_batches; + batches[bhead++] = thread_batches[j + 1]; + written_batches += thread_batches[j + 0]; + } + points += segments * 2; ids += segments; pressures += segments; @@ -365,6 +396,7 @@ merge_results(int *segment_counts, char **buffers, int nthreads) } segment_counts[0] = total_segments; + batch_counts[0] = total_batches; return(merged); } diff --git a/client/wasm/lod.wasm b/client/wasm/lod.wasm index 23c843d..6fe6fbc 100755 Binary files a/client/wasm/lod.wasm and b/client/wasm/lod.wasm differ diff --git a/client/webgl_draw.js b/client/webgl_draw.js index 52d35a5..e7f5517 100644 --- a/client/webgl_draw.js +++ b/client/webgl_draw.js @@ -273,13 +273,11 @@ async function draw(state, context, animate, ts) { } } - // TODO: what do we do with this - const circle_lod = Math.round(Math.min(7, 3 * Math.sqrt(state.canvas.zoom))); + // TODO: @speed we can do this once at startup const lod_levels = []; let total_lod_floats = 0; let total_lod_indices = 0; let stat_total_vertices = 0; - for (let i = 0; i <= 7; ++i) { const d = geometry_good_circle_and_dummy(i); lod_levels.push({ @@ -295,20 +293,9 @@ async function draw(state, context, animate, ts) { if (segment_count > 0) { const pr = programs['main']; - const nbatches = 10; - const batches = []; - - for (let i = 0; i < nbatches; ++i) { - batches.push({ - 'index': Math.floor(segment_count / nbatches * i), - 'lod': circle_lod, - }); - - if (i % 2 == 1) { - batches[batches.length - 1].lod = Math.max(0, batches[batches.length - 1].lod); - } - } - batches.push({'index': segment_count, 'lod': -1}); // lod unused + // Last pair (lod unused) to have a proper from;to + tv_add2(context.instance_data_batches, segment_count); + tv_add2(context.instance_data_batches, -1); gl.clear(gl.DEPTH_BUFFER_BIT); // draw strokes above the images gl.useProgram(pr.program); @@ -363,11 +350,10 @@ async function draw(state, context, animate, ts) { gl.vertexAttribDivisor(pr.locations['a_stroke_id'], 1); gl.vertexAttribDivisor(pr.locations['a_pressure'], 1); - for (let b = 0; b < batches.length - 1; ++b) { - const batch = batches[b]; - const batch_from = batches[b].index; - const batch_size = batches[b + 1].index - batch_from; - const level = lod_levels[batch.lod]; + for (let b = 0; b < context.instance_data_batches.size - 2; b += 2) { + const batch_from = context.instance_data_batches.data[b + 0]; + const batch_size = context.instance_data_batches.data[b + 2] - batch_from; + const level = lod_levels[context.instance_data_batches.data[b + 1]]; if (batch_size > 0) { stat_total_vertices += batch_size * level.data.indices.size; @@ -593,7 +579,6 @@ async function draw(state, context, animate, ts) { Strokes onscreen: ${context.clipped_indices.size} Segments onscreen: ${segment_count} Total vertices: ${stat_total_vertices} - Circle LOD: ${circle_lod} Canvas offset: (${Math.round(state.canvas.offset.x * 100) / 100}, ${Math.round(state.canvas.offset.y * 100) / 100}) Canvas zoom level: ${state.canvas.zoom_level} Canvas zoom: ${Math.round(state.canvas.zoom * 100) / 100}`; diff --git a/client/webgl_geometry.js b/client/webgl_geometry.js index b770e37..27085ea 100644 --- a/client/webgl_geometry.js +++ b/client/webgl_geometry.js @@ -58,6 +58,9 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa ser_u16(context.stroke_data, b); ser_u16(context.stroke_data, stroke.width); + tv_add(state.wasm.buffers['width'].tv, stroke.width); + state.wasm.buffers['width'].used += 4; + if (!skip_bvh) bvh_add_stroke(state, state.bvh, stroke_index, stroke); }