From 6d9791d3e06f2c163bdf51ec9d1eee486c6cce59 Mon Sep 17 00:00:00 2001 From: "A.Olokhtonov" Date: Mon, 1 Apr 2024 00:28:04 +0300 Subject: [PATCH] Cache full stroke lists in BVH levels (massive speedup!). Remove "line_threshold" array, since it has very little effect. --- client/bvh.js | 28 ++++++++++++++++++++++------ client/index.js | 1 + client/lod_worker.js | 1 - client/math.js | 4 ++-- client/speed.js | 14 -------------- client/wasm/lod.c | 12 ------------ client/wasm/lod.wasm | Bin 2858 -> 2784 bytes client/webgl_geometry.js | 3 --- 8 files changed, 25 insertions(+), 38 deletions(-) diff --git a/client/bvh.js b/client/bvh.js index f1b34c7..94486ff 100644 --- a/client/bvh.js +++ b/client/bvh.js @@ -54,8 +54,6 @@ function bvh_compute_sah(bvh, new_leaf, potential_sibling, only_parent = false) return cost; } -// todo area func - function bvh_find_best_sibling(bvh, leaf_index) { // branch and bound @@ -171,6 +169,13 @@ function bvh_intersect_quad(bvh, quad, result_buffer) { continue; } + if (node.is_fullnode) { + if (quad_fully_inside(quad, node.bbox)) { + tv_append(result_buffer, node.stroke_indices.data); + continue; + } + } + if (node.is_leaf) { tv_add(result_buffer, node.stroke_index); } else { @@ -207,7 +212,7 @@ function bvh_clip(state, context) { tv_data(context.clipped_indices).sort(); // we need to draw back to front still! } -function bvh_construct_rec(bvh, vertical, strokes) { +function bvh_construct_rec(bvh, vertical, strokes, depth) { if (strokes.length > 1) { // internal let sorted_strokes; @@ -221,8 +226,8 @@ function bvh_construct_rec(bvh, vertical, strokes) { const node_index = bvh_make_internal(bvh); const left_of_split_count = Math.floor(strokes.length / 2); - const child1 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(0, left_of_split_count)); - const child2 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(left_of_split_count, sorted_strokes.length)); + const child1 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(0, left_of_split_count), depth + 1); + const child2 = bvh_construct_rec(bvh, !vertical, sorted_strokes.slice(left_of_split_count, sorted_strokes.length), depth + 1); bvh.nodes[child1].parent_index = node_index; bvh.nodes[child2].parent_index = node_index; @@ -231,6 +236,17 @@ function bvh_construct_rec(bvh, vertical, strokes) { bvh.nodes[node_index].child2 = child2; bvh.nodes[node_index].bbox = quad_union(bvh.nodes[child1].bbox, bvh.nodes[child2].bbox); + if (depth === config.bvh_fullnode_depth) { + const indices = tv_create(Int32Array, round_to_pow2(strokes.length, 32)); + + for (let i = 0; i < strokes.length; ++i) { + tv_add(indices, strokes[i].index); + } + + bvh.nodes[node_index].stroke_indices = indices; + bvh.nodes[node_index].is_fullnode = true; + } + return node_index; } else { // leaf @@ -240,6 +256,6 @@ function bvh_construct_rec(bvh, vertical, strokes) { function bvh_construct(state) { if (state.events.length > 0) { - state.bvh.root = bvh_construct_rec(state.bvh, true, state.events); + state.bvh.root = bvh_construct_rec(state.bvh, true, state.events, 0); } } diff --git a/client/index.js b/client/index.js index 0f484b4..384b07c 100644 --- a/client/index.js +++ b/client/index.js @@ -24,6 +24,7 @@ const config = { initial_dynamic_bytes: 4096, stroke_texture_size: 1024, // means no more than 1024^2 = 1M strokes in total (this is a LOT. HMH blackboard has like 80K) dynamic_stroke_texture_size: 128, // means no more than 128^2 = 16K dynamic strokes at once + bvh_fullnode_depth: 5, benchmark: { zoom: 0.00001, offset: { x: 425, y: -1195 }, diff --git a/client/lod_worker.js b/client/lod_worker.js index 311f9f0..b666434 100644 --- a/client/lod_worker.js +++ b/client/lod_worker.js @@ -19,7 +19,6 @@ function work(indices_base, indices_count, zoom, offsets) { exports.do_lod( indices_base, indices_count, zoom, offsets['coords_from'], - offsets['line_threshold'], offsets['xs'], offsets['ys'], offsets['pressures'], diff --git a/client/math.js b/client/math.js index c9c4519..3016552 100644 --- a/client/math.js +++ b/client/math.js @@ -282,8 +282,8 @@ function quads_intersect(a, b) { return false; } -function quad_fully_onscreen(screen, bbox) { - if (screen.x1 < bbox.x1 && screen.x2 > bbox.x2 && screen.y1 < bbox.y1 && screen.y2 > bbox.y2) { +function quad_fully_inside(outer, inner) { + if (outer.x1 < inner.x1 && outer.x2 > inner.x2 && outer.y1 < inner.y1 && outer.y2 > inner.y2) { return true; } diff --git a/client/speed.js b/client/speed.js index 9cfbaa6..c03d4d8 100644 --- a/client/speed.js +++ b/client/speed.js @@ -76,10 +76,6 @@ async function init_wasm(state) { 'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes), 'used': 0, }, - 'line_threshold': { - 'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes), - 'used': 0, - }, 'pressures': { 'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes / 8), 'used': 0 @@ -94,8 +90,6 @@ async function init_wasm(state) { mem, state.wasm.buffers['ys'].offset); state.wasm.buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, mem, state.wasm.buffers['coords_from'].offset); - state.wasm.buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4, - mem, state.wasm.buffers['line_threshold'].offset); state.wasm.buffers['pressures'].tv = tv_create_on(Uint8Array, state.wasm.coords_bytes / 8, mem, state.wasm.buffers['pressures'].offset); @@ -107,7 +101,6 @@ function wasm_ensure_by(state, nstrokes, ncoords) { const buffers = state.wasm.buffers; const old_coords_from_offset = buffers['coords_from'].offset; - const old_line_threshold_offset = buffers['line_threshold'].offset; const old_pressures_offset = buffers['pressures'].offset; const old_size_coords = state.wasm.coords_bytes; @@ -139,20 +132,17 @@ function wasm_ensure_by(state, nstrokes, ncoords) { buffers['xs'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2); buffers['ys'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2); buffers['coords_from'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes); - buffers['line_threshold'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes); buffers['pressures'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 8); buffers['xs'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, buffers['xs'].offset); buffers['ys'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, buffers['ys'].offset); buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, mem, buffers['coords_from'].offset); - buffers['line_threshold'].tv = tv_create_on(Float32Array, state.wasm.stroke_bytes / 4, mem, buffers['line_threshold'].offset); buffers['pressures'].tv = tv_create_on(Uint8Array, state.wasm.coords_bytes / 8, mem, buffers['pressures'].offset); // TODO: this should have been automatic maybe? buffers['xs'].tv.size = buffers['xs'].used / 4; buffers['ys'].tv.size = buffers['ys'].used / 4; buffers['coords_from'].tv.size = buffers['coords_from'].used / 4; - buffers['line_threshold'].tv.size = buffers['line_threshold'].used / 4; buffers['pressures'].tv.size = buffers['pressures'].used; const tmp = new Uint8Array(Math.max(state.wasm.coords_bytes, state.wasm.stroke_bytes)); // TODO: needed? @@ -161,9 +151,6 @@ function wasm_ensure_by(state, nstrokes, ncoords) { tmp.set(new Uint8Array(mem, old_pressures_offset, buffers['pressures'].used)); memv.set(new Uint8Array(tmp.buffer, 0, buffers['pressures'].used), buffers['pressures'].offset); - tmp.set(new Uint8Array(mem, old_line_threshold_offset, old_size_strokes)); - memv.set(new Uint8Array(tmp.buffer, 0, old_size_strokes), buffers['line_threshold'].offset); - tmp.set(new Uint8Array(mem, old_coords_from_offset, old_size_strokes)); memv.set(new Uint8Array(tmp.buffer, 0, old_size_strokes), buffers['coords_from'].offset); } @@ -187,7 +174,6 @@ async function do_lod(state, context) { const indices_per_thread = Math.floor(context.clipped_indices.size / state.wasm.workers.length); const offsets = { 'coords_from': buffers['coords_from'].offset, - 'line_threshold': buffers['line_threshold'].offset, 'xs': buffers['xs'].offset, 'ys': buffers['ys'].offset, 'pressures': buffers['pressures'].offset, diff --git a/client/wasm/lod.c b/client/wasm/lod.c index 949f5a1..fd5508f 100644 --- a/client/wasm/lod.c +++ b/client/wasm/lod.c @@ -197,7 +197,6 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo void do_lod(int *clipped_indices, int clipped_count, float zoom, int *stroke_coords_from, - float *line_threshold, float *xs, float *ys, unsigned char *pressures, @@ -236,13 +235,6 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, // Basic CSR crap segments_from[i] = segments_head; - if (zoom < line_threshold[stroke_index]) { - // Fast paths for collapsing to a single line segment - segments[segments_head++] = 0; - segments[segments_head++] = point_count - 1; - continue; - } - int segment_count = 2; int stack_head = 0; @@ -280,10 +272,6 @@ do_lod(int *clipped_indices, int clipped_count, float zoom, } segments[segments_head++] = point_count - 1; - - if (segment_count == 2 && zoom > line_threshold[stroke_index]) { - line_threshold[stroke_index] = zoom; - } } segments_from[clipped_count] = segments_head; diff --git a/client/wasm/lod.wasm b/client/wasm/lod.wasm index c2827dad5c1162e7b4b8a2a78642e0b0fab0eb6e..2e56d77fa67992d6396eeb6d9d9feeee965a015f 100755 GIT binary patch literal 2784 zcmZuzOLG+06~2#o^-J@RkU#_C+$-4z31K9#z${WU#+1vh^1@ywcGJ={LOml5R*!6x z#5J?X!d3BtfU?go$Rb&%DvSJr`~ojX{zTG>ztcSmDw6K%zV|%tIp00!+=qy>eJO;H zRDC4GBRS?zjvqPW@pt3m5sx%lIaVF1lY_5o`^o$wX?;6%wydN>^&12V)sN z|NgJS8;kJu@02rXF#m_*|GfN9bxX-)^0(K6B7=4n&PowQPOSBG3;i+=H)*D8HVNW!I{p4^ri4K$OXfVoz<3~|1 z8%9s!gWe#~72xc}qc}Mco6Suzw^Hmp&C-TuUf7a(&O)eQ1|#nyh9PnlaG{` zQ9>#&oHXu>+kz7js*?p(bjBw_iA7&}%$4`YClaLR)dYzZfqKB8P;9B9`ig7Jq~Xgr9N`Em4Z-EzkuXVO%og z7d@W@m3EiH37&5c%?ep+7Q#g&V`=KbbODg_Q=vu9NjJ4Eh-@=mRl+E8>3nKu5k;y} z!Mg*cSyJ2tkWr0V>ADbig#a4ROSD2xmuqQ}W50;hBBu_yVS@n%A?6fJ6AiFX@fm$; zE+FkBjqfW@6faN;y2N0LnJ{3>bpn_e!_{QY0m{WPb95Owfl5*4)WH(bf;zx-4uMVW z!j@_P>$%jNr>o$>st=!#&oM|yLbY$-PR&xNO%pMhVW2)bO-x^Wj0ny0AfgVVX!NaN z0ByLK4Rs&sV|W(4P;yaSE)zj*j)ZS!eC=_O_n~0tsr@jJi1<00Pi>dYx*TqB+Ca#( z!?eBj(yAXwkvc~7OO+~>qV<<{l~#{m{LcQjtgoZ2-=o#nb^b0GThzuxte3aZQE7fr zqSa^>+6r22HfbI+FfF>kDvX8GMT?xL8qL$&DJR%2{~6|PgVR3ctT5*et>gbbUBmxw z?*HeF$K0e_W`k>tTAv~=9lDu&mFE+Q-UfO&ufnl$d4;0fuwaV)B5Wasii@%f0Bz1- zSQ*-GpYgRAUlnC%f^ZC=iq?K^TivMtd}Gj^{r`v^{7#9OP>+>D^g<(QXFr`uS-WnC0` zirtK>xr)hjLQFd)uUOemIDQzQK(RUt9;X{-zWfl(Rls+Q>tU6^4z@!N;8|Z|eY;#x zc3S0F{wPMtEOuQAQJv|1PDAr{!EdEy6*}SERAV-*rKaiYDs7RIBl2`N|A)-~Dd{~5 z3i|z$&R@y=N16MaowE-jwv^_-5F!s+61R?Ot=@55Z#I}|=KqrR2HoNDKT5hmx62+| zdc~3r@HX-X_D24h*`Q6jN!+^${6$^^Za8#2bK1=P3jCh^@;^$miI8wq5tq|-hV{*x z1=j2(-Um{wIM(2orso}kjk7And6-M-j&V|HQla1vCso}j6pKQMD&DXZX)IL&iVA54 zqp>}ydniziA(c1IhqH&HRS@$r(LhyHiethdWoocq{}6>JNEa1=Gmdwa8c8s$%igNc zw%)&8Bm^svIUe?1u6e&;HcDod%~Q2tRSDZ_1M1k#92oObQ}d>Xsm?R07lA;i0Goq^ z?RC?6dN}MI?Ieep^wHf(vMlMXZ$Fd%gJgRzKA@dJe6Z_nY+b)`eT%Ma-nuG2X^vvt zsYcP`Bpw~%W>sCQ$D<)GY1vwH|ERe5te1DCY^|Q{?e{h|@2q*-M}z))dHGpuWVi~h IC*O{K0%`C=?*IS* literal 2858 zcmZuz-EtJi74GiY-|3lMt$slg;&g*;V37bJ1jcrX!9ZM2Di_XWVuqD=5Sm?Sv1S#U zB(B|yRLWKH1p#uO7f3Gb7sy4PAWz^6CohqVlJE4au*&gn&2;za)92?qKRu$8?MWeo zH04J^Jdz`R%o@ zM$fefK)1jB8B)(!HM-?+I_QGamo_yF-$Bzo_ZzKh%6Hjz5K+@ph-*kGF?| zgG>lNj)(m$&USyDcv2UOPJef2-&4&A`|kekkeP(6WXUkjo(fMtK1h-{8+L}f+oBY> zRrlF`XAjGA0P%zE7XcwSg+9y3=f-9g+RbVV)POAdCD_#nv+`@>As zgE;PHgZN2jzuQl|671}DhMhRs?|Nm3gzoJgPJZfDz*(%G?Cw1As$m@O^aop=exb33 zlTJUMtYfplz1Z||cG%hKC&f}PoOF4@__*KM@q+39-w~=NmH(HP?+K~-xhK>e{sZqw zMDpkZB^pXdrK54}uDB^U5Rp1sP;N3h5=txvQZrTF9UVz5wXO&f=LPZsha$15-1K*o z=@L_kUl)|HBE>dVoEWKIL9SrorO|IcIa*OT;<$?4RR6x=nNjU?{hMjOW88YfRwXp#XG`G}J_Dm|8z7rI=Hgl8>=xLee1NU=Yck2?AMm0@N_7 z{;&b&G#Ek%bGF%~i4V&Y`^+4bS>PF}@UZf}5aLIq{)BP#{wuqh&zPGqYk|xISauq= z!Ypb6fJm4IL^q?bf6{peik${M=%;3Zc;*dLbBdN|IW4rBMVg-2F_W4lNb>PaLC;j7 zr|vXRjh3J&sA?#lf?_+zVAxVL${aTZ!kmGD9G^0kI3{OOyMiryHoc^TQ8Yud zsU@ID6>_v&U?NgHgfx>+f zYN?rJn^V(3*hv~cP+GV@tQ54s#`p{dPc*Q>)Jy}pB8PlBT|^Fwo9vTIbDHi!Ns~3t zATJd##L=^G$zJ^!&d$*c^THb0<6uZlEfkd}sO4yuHdPI-O{d023SmBj*g8YZIk>r! zn)AFHsXe=xnl~cPGy!81ffFA>ybJ4_)Z$dH_3T@0as#5LBCltimG%OABsu+#ZD}xO zGtOi{z(DoDV}Z_tP$E3j9EthD3OE8v1NQM+bFA#zOWXdE+SGo9Q29*4SZrOv7U(bj zXfGevz%|ssEpWdktb_12B7yV^@|FupzAQ0%X}+v5mN0te24dzhxaXKkj`_6 zt29fCDQAPd_%+1c2BmRKxx|!r=nCHN(i-0H<-vcy_cNPx(`*1qpMswPnig&5y7YW3 zG26flms>P4K9{pFZB&e+dkkIo+2`Hbd! zU%kMoi#_WjcA)tg`Jma$wEB>N{`eE8mo6VGo%FnrE5g-BiqniJD)7qmHt8Gzp!f2B%KTrFZc*sg?-uL)TbX|& z^MHeN@(1AFNb_F`k%vP`;o<}Nztcb6?s!>QJh4fY2

=* zV}faDyKJF{KBw*=c|9B{eea?lA0JVwyE_(*07TSl;E1(?SZ485cBC?)71zKXq^9mVhpFg^#Z+ZastyPM z2(uk@TCCTf9t^sN+sQ#D15CG*EK9npThC;0FWK7d?9+C?v%jOSZLVEi+oTKY8yCe- zn!^r0&W7>hq%%CkCtA5(?F> 16) & 0xFF; const g = (color_u32 >> 8) & 0xFF;