From 4f84509b165cf517a25b47c3095973ac3677b1d7 Mon Sep 17 00:00:00 2001 From: "A.Olokhtonov" Date: Wed, 27 Mar 2024 00:46:06 +0300 Subject: [PATCH] Multithreading works! Kinda sorta (slows down in chrome, out of bounds accesses on phone) --- client/lod_worker.js | 44 +++++++++ client/speed.js | 180 ++++++++++++++++++++---------------- client/wasm/compile_command | 2 +- client/wasm/lod.c | 80 +++++++++++++--- client/wasm/lod.wasm | Bin 2192 -> 2844 bytes client/wasm/multi.c | 37 -------- client/wasm/multi.wasm | Bin 680 -> 0 bytes client/wasm_worker.js | 40 -------- client/webgl_draw.js | 23 ++--- client/webgl_geometry.js | 4 +- 10 files changed, 228 insertions(+), 182 deletions(-) create mode 100644 client/lod_worker.js delete mode 100644 client/wasm/multi.c delete mode 100755 client/wasm/multi.wasm delete mode 100644 client/wasm_worker.js diff --git a/client/lod_worker.js b/client/lod_worker.js new file mode 100644 index 0000000..2f382f8 --- /dev/null +++ b/client/lod_worker.js @@ -0,0 +1,44 @@ +let thread_id = null; +let exports = null; + +async function init(tid, memory, heap_base) { + thread_id = tid; + + const result = await WebAssembly.instantiateStreaming(fetch('wasm/lod.wasm'), { + env: { 'memory': memory } + }); + + exports = result.instance.exports; + exports.set_sp(heap_base - thread_id * 16 * 4096); // 64K stack + + postMessage({ 'type': 'init_done' }); +} + +function work(indices_base, indices_count, zoom, offsets) { + exports.do_lod( + indices_base, indices_count, zoom, + offsets['coords_from'], + offsets['line_threshold'], + offsets['xs'], + offsets['ys'], + offsets['pressures'], + offsets['result_buffers'] + thread_id * 4, + offsets['result_counts'] + thread_id * 4, + ); + + postMessage({ 'type': 'lod_done' }); +} + +onmessage = (e) => { + const d = e.data; + + if (d.type === 'init') { + init(d.thread_id, d.memory, d.heap_base); + } else if (d.type === 'lod') { + work(d.indices_base, d.indices_count, d.zoom, d.offsets); + } else { + console.error('unexpected worker command:', d.type); + } +} + + diff --git a/client/speed.js b/client/speed.js index bc91a27..9cfbaa6 100644 --- a/client/speed.js +++ b/client/speed.js @@ -1,74 +1,65 @@ -async function init_test() { - const memory = new WebAssembly.Memory({ - initial: 32, - maximum: 100, - shared: true, - }); - - const results = await WebAssembly.instantiateStreaming(fetch('wasm/multi.wasm'), { - env: { 'memory': memory } +function worker_message(worker, message) { + return new Promise((resolve) => { + worker.onmessage = (e) => resolve(e.data); + worker.postMessage(message); }); +} - const nworkers = navigator.hardwareConcurrency; - const heap_base = results.instance.exports.alloc(0); - const buf_offset = results.instance.exports.alloc(1024); - const workers = []; - const sab = new SharedArrayBuffer(nworkers * 4); - const flags = new Int32Array(sab); - let done = 0; +function workers_messages(workers, messages) { + const promises = []; - for (let i = 0; i < nworkers; ++i) { - const w = new Worker('wasm_worker.js'); - workers.push(w); + for (let i = 0; i < workers.length; ++i) { + promises.push(worker_message(workers[i], messages[i])); } - for (let i = 0; i < nworkers; ++i) { - workers[i].onmessage = () => { - ++done; - if (done % nworkers === 0) { - console.log('init done'); - - for (let j = 0; j < nworkers; ++j) { - workers[j].onmessage = () => { - ++done; - - if (done % nworkers === 0) { - console.log('work done'); - console.log(new Int32Array(memory.buffer, buf_offset, nworkers)); - } - } - - workers[j].postMessage({ - 'type': 1, - 'num': 10, - }); - } - } - } + return Promise.all(promises); +} - workers[i].postMessage({ - 'type': 0, - 'thread_id': i, - 'memory': memory, - 'stack_base': heap_base, - 'buffer_offset': buf_offset, - 'flags': flags - }); - } +function workers_thread_message(workers, message, thread_field=null) { + const messages = []; -// const results = await WebAssembly.instantiateStreaming(fetch('wasm/multi.wasm')); + for (let i = 0; i < workers.length; ++i) { + if (thread_field !== null) { + const m = structuredClone(message); + m[thread_field] = i; + messages.push(m); + } else { + messages.push(message); + } + } -// state.wasm.exports = results.instance.exports; -// state.wasm.exports.memory.grow(4096); + return workers_messages(workers, messages); } async function init_wasm(state) { - await init_test(); + const memory = new WebAssembly.Memory({ + initial: 32, // 2MiB, 1MiB of which is stack + maximum: 16384, // 1GiB + shared: true, + }); - const results = await WebAssembly.instantiateStreaming(fetch('wasm/lod.wasm')); + // "Master thread" to do maintance on (static allocations, merging results etc) + const master_wasm = await WebAssembly.instantiateStreaming(fetch('wasm/lod.wasm'), { + env: { 'memory': memory } + }); - state.wasm.exports = results.instance.exports; - state.wasm.exports.memory.grow(4096); + const nworkers = navigator.hardwareConcurrency; + + state.wasm.exports = master_wasm.instance.exports; + state.wasm.heap_base = state.wasm.exports.alloc_static(0); + state.wasm.workers = []; + state.wasm.memory = memory; + + for (let i = 0; i < nworkers; ++i) { + const w = new Worker('lod_worker.js'); + state.wasm.workers.push(w); + } + + await workers_thread_message(state.wasm.workers, { + 'type': 'init', + 'heap_base': state.wasm.heap_base, + 'memory': memory, + }, 'thread_id'); state.wasm.stroke_bytes = 4096; state.wasm.coords_bytes = 4096; @@ -95,7 +86,7 @@ async function init_wasm(state) { }, }; - const mem = state.wasm.exports.memory.buffer; + const mem = state.wasm.memory.buffer; state.wasm.buffers['xs'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, state.wasm.buffers['xs'].offset); @@ -135,11 +126,14 @@ function wasm_ensure_by(state, nstrokes, ncoords) { } if (realloc) { - // TODO: we do memory.grow() somewhere here if needed + const current_pages = state.wasm.memory.buffer.byteLength / (4096 * 16); + const need_pages = (state.wasm.coords_bytes * 3 + state.wasm.stroke_bytes * 2) / (4096 * 16); // TODO: figure out actual memory requirements + const grow_by = need_pages - current_pages; + state.wasm.memory.grow(grow_by); state.wasm.exports.free_static(); - const mem = state.wasm.exports.memory.buffer; + const mem = state.wasm.memory.buffer; const memv = new Uint8Array(mem); buffers['xs'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2); @@ -175,35 +169,65 @@ function wasm_ensure_by(state, nstrokes, ncoords) { } } -function do_lod(state, context) { +async function do_lod(state, context) { state.wasm.exports.free_dynamic(); + const buffers = state.wasm.buffers; + const result_buffers = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4); + const result_counts = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4); const clipped_indices = state.wasm.exports.alloc_dynamic(context.clipped_indices.size * 4); - const mem = new Uint8Array(state.wasm.exports.memory.buffer); + const mem = new Uint8Array(state.wasm.memory.buffer); // Dynamic input data that should (by design) never be too big mem.set(tv_bytes(context.clipped_indices), clipped_indices); - const buffers = state.wasm.buffers; - const segment_count = state.wasm.exports.do_lod( - clipped_indices, context.clipped_indices.size, state.canvas.zoom, - buffers['coords_from'].offset, - buffers['line_threshold'].offset, - buffers['xs'].offset, - buffers['ys'].offset, - buffers['pressures'].offset, - buffers['xs'].used / 4, + // TODO: this is a very naive and dumb way of distributing work. Better way + // would be to distrubute strokes based on total point count, so that + // each worker gets approximately the same amout of _points_ + const indices_per_thread = Math.floor(context.clipped_indices.size / state.wasm.workers.length); + const offsets = { + 'coords_from': buffers['coords_from'].offset, + 'line_threshold': buffers['line_threshold'].offset, + 'xs': buffers['xs'].offset, + 'ys': buffers['ys'].offset, + 'pressures': buffers['pressures'].offset, + 'result_buffers': result_buffers, + 'result_counts': result_counts, + }; + + const jobs = []; + + for (let i = 0; i < state.wasm.workers.length; ++i) { + let count = indices_per_thread; + + if (i === state.wasm.workers.length - 1) { + count += context.clipped_indices.size % state.wasm.workers.length; + } + + jobs.push({ + 'type': 'lod', + 'indices_base': clipped_indices + i * 4 * indices_per_thread, + 'indices_count': count, + 'zoom': state.canvas.zoom, + 'offsets': offsets + }); + } + + await workers_messages(state.wasm.workers, jobs); + const result_offset = state.wasm.exports.merge_results( + result_counts, + result_buffers, + state.wasm.workers.length ); - // Use results without copying from WASM memory - const result_offset = clipped_indices + context.clipped_indices.size * 4 - + (context.clipped_indices.size + 1) * 4 + buffers['xs'].used; + const segment_count = new Int32Array(state.wasm.memory.buffer, result_counts, 1)[0]; // by convention - const wasm_points = new Float32Array(state.wasm.exports.memory.buffer, + // Use results without copying from WASM memory + const wasm_points = new Float32Array(state.wasm.memory.buffer, result_offset, segment_count * 2); - const wasm_ids = new Uint32Array(state.wasm.exports.memory.buffer, + const wasm_ids = new Uint32Array(state.wasm.memory.buffer, result_offset + segment_count * 2 * 4, segment_count); - const wasm_pressures = new Uint8Array(state.wasm.exports.memory.buffer, + const wasm_pressures = new Uint8Array(state.wasm.memory.buffer, result_offset + segment_count * 2 * 4 + segment_count * 4, segment_count); context.instance_data_points.data = wasm_points; diff --git a/client/wasm/compile_command b/client/wasm/compile_command index b496251..412a79b 100644 --- a/client/wasm/compile_command +++ b/client/wasm/compile_command @@ -1 +1 @@ -clang -Oz --target=wasm32 -nostdlib -msimd128 -mbulk-memory -matomics -Wl,--no-entry,--import-memory,--shared-memory,--max-memory=$((1024 * 1024 * 1024)) -z stack-size=$((1024 * 1024)) multi.c -o multi.wasm +clang -Oz --target=wasm32 -nostdlib -msimd128 -mbulk-memory -matomics -Wl,--no-entry,--import-memory,--shared-memory,--export-all,--max-memory=$((1024 * 1024 * 1024)) -z stack-size=$((1024 * 1024)) lod.c -o lod.wasm diff --git a/client/wasm/lod.c b/client/wasm/lod.c index bed8a70..fa7dc81 100644 --- a/client/wasm/lod.c +++ b/client/wasm/lod.c @@ -8,10 +8,11 @@ static int allocated_static; static int allocated_dynamic; void -set_sp(void *sp) +set_sp(char *sp) { - __asm__ volatile( - "local.get 0\n" + __asm__ __volatile__( + ".globaltype __stack_pointer, i32\n" + "local.get %0\n" "global.set __stack_pointer\n" : : "r"(sp) ); @@ -32,16 +33,26 @@ free_dynamic(void) void * alloc_static(int size) { + // This IS NOT thread-safe void *result = &__heap_base + allocated_static; allocated_static += size; return(result); } +static int +round_to_pow2(int value, int multiple) +{ + return((value + multiple - 1) & -multiple); +} + void * alloc_dynamic(int size) { - void *result = &__heap_base + allocated_static + allocated_dynamic; - allocated_dynamic += size; + // Very ad-van-ced thread-safe allocator + // CAN be called from multiple threads + size = round_to_pow2(size, 4); + int original_allocated_dynamic = __atomic_fetch_add(&allocated_dynamic, size, __ATOMIC_SEQ_CST); + void *result = &__heap_base + allocated_static + original_allocated_dynamic; return(result); } @@ -183,21 +194,27 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo return(result); } -int +void do_lod(int *clipped_indices, int clipped_count, float zoom, int *stroke_coords_from, float *line_threshold, float *xs, float *ys, unsigned char *pressures, - int coordinates_count) + char **result_buffer, + int *result_count) { if (clipped_count == 0) { - return(0); + result_count[0] = 0; + return; } + int first_stroke = clipped_indices[0]; + int last_stroke = clipped_indices[clipped_count - 1]; + int total_points = stroke_coords_from[last_stroke + 1] - stroke_coords_from[first_stroke]; + int *segments_from = alloc_dynamic((clipped_count + 1) * 4); - int *segments = alloc_dynamic(coordinates_count * 4); + int *segments = alloc_dynamic(total_points * 4); // TODO: this is a very conservative estimate, we can lower memory usage if we get this tighter int segments_head = 0; int stack[4096]; // TODO: what's a reasonable max size for this? @@ -267,9 +284,11 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, segments_from[clipped_count] = segments_head; // Write actual coordinates (points) and stroke ids - float *points = alloc_dynamic(segments_head * 2 * 4); - int *ids = alloc_dynamic(segments_head * 4); - unsigned char *pressures_res = alloc_dynamic(segments_head); + // Do this in one allocation so that they're not interleaved between threads + char *output = alloc_dynamic(segments_head * (3 * 4 + 1)); + float *points = (float *) output; + int *ids = (int *) (output + segments_head * 4 * 2); + unsigned char *pressures_res = (unsigned char *) (output + segments_head * 4 * 3); int phead = 0; int ihead = 0; @@ -300,5 +319,40 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, } } - return(segments_head); + result_buffer[0] = output; + result_count[0] = segments_head; +} + +// NOT thread-safe, only call from one thread +char * +merge_results(int *segment_counts, char **buffers, int nthreads) +{ + int total_segments = 0; + + for (int i = 0; i < nthreads; ++i) { + total_segments += segment_counts[i]; + } + + char *merged = alloc_dynamic(total_segments * (3 * 4 + 1)); + + float *points = (float *) merged; + int *ids = (int *) (merged + total_segments * 4 * 2); + unsigned char *pressures = (unsigned char *) (merged + total_segments * 4 * 3); + + for (int i = 0; i < nthreads; ++i) { + int segments = segment_counts[i]; + if (segments > 0) { + __builtin_memcpy(points, buffers[i], segments * 4 * 2); + __builtin_memcpy(ids, buffers[i] + segments * 4 * 2, segments * 4); + __builtin_memcpy(pressures, buffers[i] + segments * 4 * 3, segments); + + points += segments * 2; + ids += segments; + pressures += segments; + } + } + + segment_counts[0] = total_segments; + + return(merged); } diff --git a/client/wasm/lod.wasm b/client/wasm/lod.wasm index 7f218d765d942c5377cdfb9635a0b7eb309750e0..1a066e784cb0ba3e8c47760d5a2ee5e6ea2b7c1f 100755 GIT binary patch literal 2844 zcmZuz&2k&Z5uTaFANPm&BSlG|sGcDuOCmu@v`EUbt^(<(!lmN9mGu<=YUw_>*v!avk zNg;&P#V0~Mkt2TO=!rWTeKE>^;)#Qf9I1wM;{9Kh_Ts(4!3*Wo$7AVuu8{Z(rJO?P z=fOyZFTebo@J1s1@pU2@S6Kd)eZS1#lexKOJpTKSizLtevf`Ehq0ZE!=riDqwmbcP zv^^Xgq(Zn+H0-BQy8G+ckzPLN^mljm9aWu7-`(FG@+2WkX*`V5=fd%x9>j5!4m-o$ zZBg*;sQY5Svxi~Phxx(wiM9lz&*IMWXseUPjuYg{lgZ^$cM$alT~P`4;)9(yI*8N5 z{xB6&eiU`nLG-M%-|fdv0XVyzVJC|ByG{`%;d{G>m7h5!%*;p6c6XjRfve{?J{^t_TFu_RCrSQLs4 zWxKzdbXzPXeqB+*hy-ekxG)lL0kwjGlZ3zh^msvGiJg^e6YsmgF{8>C-ftE~_~)_g znZ^>}l6+XWM^n}jppZiXmP0j6(C8*fbWK1a2@AbkL2yv8S{MF;u>Al1=?l=h)a=24 zMPsf^P+y%O(hIOI`1z=%-=ZR{>T6WuX88jl#9dNfP2Bh zN@yD4heFT5%5)g?Pls+PF=rw7`-#@XJ!hC`Ls#f(k|WTIbasO2T%wm?setD^{LJKj zrmP35&=MT^1rLsAm=Bl3H$gZ`twbh<#a1X2U8R5|{=tVWgLhAR7!(rjp(UN@8sMDg z71}351=}kS=&~j#9OL)-fxbkG;FqVhkQbV`(DMN3frTQq0E6Ze(}c{F9U*`QW1>q`ZE+>FnEMu0L)k+Vepq3EwGc7Y$YY@g zu$WR{^od687D?)YuRLM@Ts?G=!8AFN9F=j=)w9Hq7;lp<#+N0T?b}@#p zGpI@hXaQ(NNap4z2zG(a@LX7tJx&HnwSg_=2x?fFIlG}M2yHge=R&0~(>0o9|1Mx6 z`elv_+EDG8=|ofL=q6}vB8lQ7m}i=erUo-t=^4Y2>o6UJPS4Vtm_?3Aa`~NLsWWG1 zEM!o?0rD0xb5ws6NTg?$<`Of{mVM3*Bz54+QE_0`Uz_$<)TZ_kQk73Bp$1rW!4L=3uqm^L30QKi9F9!EF)Dg z(&D6+X&M;08qCsPVeVZksppbQEP0P^;Qc;b$NO&P|L2XL-lW@loePruk3mg?HZ!mA zaxBqXM-Nw9IMP0EXQ5ZHyF8v7)<7e&T%>sdTi8c9QU+$8&kD|16?~wng(Fn+3a$Ra zxUGp14AEb~vU!xFbv*Ym*oJ?qGRfCp)7wY1Ml~MXczyKh_t2fQA^LQwXI$jz;-PU< za(iFD!m6b=Wu{H5kD2ICKDB<4p^4Kf7k?Ey0*&1fiS~Rh{UU;Cga|t!&+cJ296bt9 zo7l=$#yN$V#}6TF1HP+$4?Mt*hf)T3p6}o=@VTZI^DWUJ&Gjf*WH+=`qCk`ToQCSA zmDduoh3JMeNxnP?O9_td%Uqe4eWyrwsG1=GbT|8_%>E_m9je*!`}sKgR%YMG%;zMX z{((43vR?|31vQE9lwzsem~y>lFkQ{Qm*y6|$Lqh5bc^22du-`j)~rLeo;@(PvY+X7 z+MrG1-X@NgTFxLEW{?S>jm$5=@98J|i_{xnhROAQ*l^h60!PEm#}5&l`pE&voUsR+ zhf@c^Ud(snw9N^~$2FJWD$S&HMK+Q#!IHuom&@sU)!?YIcT9y3EqljIkOWPu*GDlLj!Gkowwsd)7 zZR+{KpnJF-AEeSpcRNngxZB=(A^ivO)^2B?w)>s^9q;DG>W$S6y0Uibs`zPj*um%6 zFnStyhKKklD=wEi!vVhU(&g&jq5WoR=bu^Wayi}I>)u?uz3gor_IvI8yJ@+S;w!Qp He?I--*C literal 2192 zcmZ8iOLG%P5T4mx$=Zirwy}*3re`o9#|G4Z%yTOFNmEDC==~ejcQ4#;VBRjgbAe?oQx4rd9h9qNWsA=!~ zw23{V4^oMZ%Bb1Blyf$g4(Om!2XI92b9YW1p@621I&>4AzDCZESVuQ^^_gr!U5bSY z+;y6wBX_)YI%E?W3VZsTwA(6psehB8_IVHA>N@I4RA5 zWO((gIpqlDkPo{)9G46gK?iJW*mM}ZAcv(g+%yLA8$w(b0+^szQG7JVqY%Zi!$n{} z68i`PnB^Fil__Lj2*~hxT!T@SNES#cFLFX*|A1A{6yxz39Se1%BZ#Vs;GTu-)1kb3 zsL{5|PAOG8$Q5v|2@I73XgI1-fj;{HB_E&(W&xmN2T*z7YB$8HE2w5|`e@TBp|q+| z3$deSz!B4?1EY?C3oOUk8#`P2uu&&s*UnUc_KCX)Z=1VFHKS2=N1tTK6@;$GZbw^` z`V?0rc?9>dRG6~~3mFsuhS$L!JxP^2Wyt{0aXp^XfkI%Z03Yj-(eqr|%6r}To*LBH z1xBCyg%wVpo#)_$=iLu_dH+6~$9=er{htgMV7Nk)K&h7K7*(=*7V*~-Uqj*I@$h87 zNWGfP2_16kGLLDQj&o1_%txJ9Y2L1%fuKn`Yt{nQ#= zQca#rTDk{zYP6Px-rF4ssR?OkN5&)N^UW5jfm;>w-na%P(dYt=BWVQ}3SW$#4m+PP z770+x@dz{Bpfg|U@wvSqP$4&PEW||G#FO_xI66QPQ@HY;F6`2IDzkF#{qDQ(;9Ia| z`E18_eka>RamCZ}b`$>6=1;OIz~0;$;AY<7HdI%wy=KVHVh~U6?Wo5^qXL@Y;hc%w0$rgfLj&lu>`$5f zCFwGit$sDvvyU?SgDE^=7BMk(eKDI7XgFb&A2O0GI1n5bTnMhdNZ0u=j$Nc{xx`^U zN_d*EHM3g?b6+)SmDX_VJ|-oKYdL*bm_DwaRGqT2%7x{#%S|y`9j5&!_`m - -extern char __heap_base; - -static int allocated; - -void -set_sp(char *sp) -{ - __asm__ __volatile__( - ".globaltype __stack_pointer, i32\n" - "local.get %0\n" - "global.set __stack_pointer\n" - : : "r"(sp) - ); -} - -void * -alloc(int size) -{ - void *result = &__heap_base + allocated; - allocated += size; - return(result); -} - -static void -impl(int *buffer, int index, int number) -{ - buffer[index] = number; -} - -void -write_a_number(int *buffer, int index, int number) -{ - int n = number * 2; - impl(buffer, index, n); -} diff --git a/client/wasm/multi.wasm b/client/wasm/multi.wasm deleted file mode 100755 index 01e32fb06a0a4121abffd3cfbb8dc2db83326539..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 680 zcmZWmv2NQi5WOQ~*)i1^afblyB6Lb2IB1N*=ol3l3gjbbN@8R=lw^oh-E@%(WXPDQ z-_xPrWt5asv_(PiaPRKDBk>M|E)W3l<7QrF3XfUsWu|H0Mb2M(S{^^CG=meh69IsJq4BOgPbY*9-Z@`~p72xtjn0 diff --git a/client/wasm_worker.js b/client/wasm_worker.js deleted file mode 100644 index 0e0ff7c..0000000 --- a/client/wasm_worker.js +++ /dev/null @@ -1,40 +0,0 @@ -let thread_id = null; -let buf_offset = null; -let exports = null; -let flags = null; - -function done() { - postMessage('done'); -} - -async function init_wasm(tid, memory, offset, notify_flags, stack_base) { - thread_id = tid; - buf_offset = offset; - - const result = await WebAssembly.instantiateStreaming(fetch('wasm/multi.wasm'), { - env: { 'memory': memory } - }); - - exports = result.instance.exports; - //console.log(tid, 'init'); - exports.set_sp(stack_base - thread_id * 4096); - - flags = notify_flags; - done(); -} - -function do_work(num, callback) { - //console.log(thread_id, 'work'); - exports.write_a_number(buf_offset, thread_id, thread_id * num); - done(); -} - -onmessage = (e) => { - if (e.data.type === 0) { - init_wasm(e.data.thread_id, e.data.memory, e.data.buffer_offset, e.data.flags, e.data.stack_base); - } else if (e.data.type === 1) { - do_work(e.data.num); - } -} - - diff --git a/client/webgl_draw.js b/client/webgl_draw.js index ee6f51d..546799c 100644 --- a/client/webgl_draw.js +++ b/client/webgl_draw.js @@ -1,7 +1,7 @@ function schedule_draw(state, context) { if (!state.timers.raf) { - window.requestAnimationFrame(() => { - draw(state, context); + window.requestAnimationFrame(async () => { + await draw(state, context); }); state.timers.raf = true; } @@ -73,7 +73,7 @@ function draw_html(state) { } } -function draw(state, context) { +async function draw(state, context) { const cpu_before = performance.now(); state.timers.raf = false; @@ -89,22 +89,23 @@ function draw(state, context) { gl.beginQuery(context.gpu_timer_ext.TIME_ELAPSED_EXT, query); } - gl.viewport(0, 0, context.canvas.width, context.canvas.height); - gl.clearColor(context.bgcolor.r, context.bgcolor.g, context.bgcolor.b, 1); - gl.clearDepth(0.0); - gl.clear(gl.COLOR_BUFFER_BIT | gl.DEPTH_BUFFER_BIT); - locations = context.locations['sdf'].main; buffers = context.buffers['sdf']; - gl.useProgram(context.programs['sdf'].main); - bvh_clip(state, context); - const segment_count = geometry_write_instances(state, context); + const segment_count = await geometry_write_instances(state, context); const dynamic_segment_count = context.dynamic_segment_count; const dynamic_stroke_count = context.dynamic_stroke_count; + // Only clear once we have the data, this might not always be on the same frame? + gl.viewport(0, 0, context.canvas.width, context.canvas.height); + gl.clearColor(context.bgcolor.r, context.bgcolor.g, context.bgcolor.b, 1); + gl.clearDepth(0.0); + gl.clear(gl.COLOR_BUFFER_BIT | gl.DEPTH_BUFFER_BIT); + + gl.useProgram(context.programs['sdf'].main); + // "Static" data upload if (segment_count > 0) { const total_static_size = context.instance_data_points.size * 4 + diff --git a/client/webgl_geometry.js b/client/webgl_geometry.js index 7d7bce4..ad22b65 100644 --- a/client/webgl_geometry.js +++ b/client/webgl_geometry.js @@ -37,11 +37,11 @@ function geometry_prepare_stroke(state) { } -function geometry_write_instances(state, context) { +async function geometry_write_instances(state, context, callback) { state.stats.rdp_max_count = 0; state.stats.rdp_segments = 0; - const segment_count = do_lod(state, context); + const segment_count = await do_lod(state, context); if (config.debug_print) console.debug('instances:', segment_count, 'rdp max:', state.stats.rdp_max_count, 'rdp segments:', state.stats.rdp_segments);