From 9bbab32369a2629599e78b30f0fc9ff329738b26 Mon Sep 17 00:00:00 2001 From: "A.Olokhtonov" Date: Thu, 28 Dec 2023 02:11:46 +0300 Subject: [PATCH] Try to speed up the LOD calculation with a cache (kinda succesfull, but not really) --- client/index.html | 32 +++++++++---------- client/index.js | 7 ++++- client/math.js | 67 ++++++++++++++++++++++++++++------------ client/webgl_geometry.js | 56 ++++++++++++++++++++++++--------- 4 files changed, 111 insertions(+), 51 deletions(-) diff --git a/client/index.html b/client/index.html index 2e8cb41..c8c94f7 100644 --- a/client/index.html +++ b/client/index.html @@ -7,22 +7,22 @@ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + +
diff --git a/client/index.js b/client/index.js index 07f9724..f18ef44 100644 --- a/client/index.js +++ b/client/index.js @@ -31,6 +31,7 @@ const config = { tile_size: 16, clip_zoom_threshold: 0.00003, stroke_texture_size: 1024, + rdp_cache_threshold: 100, }; const EVENT = Object.freeze({ @@ -209,7 +210,11 @@ function main() { 'render_from': 0, 'render_to': 0, 'draw_bvh': false, - } + }, + + 'rdp_cache': {}, + + 'stats': {}, }; const context = { diff --git a/client/math.js b/client/math.js index a4714c7..1960d2f 100644 --- a/client/math.js +++ b/client/math.js @@ -11,8 +11,16 @@ function screen_to_canvas(state, p) { } function rdp_find_max(state, zoom, stroke, start, end) { + // Finds a point from the range [start, end) with the maximum distance from the line (start--end) const EPS = 1.0 / zoom; - // const EPS = 10.0; + let cache_key = null; + + if (end - start > config.rdp_cache_threshold) { + cache_key = stroke.index + '-' + zoom + '-' + start + '-' + end; + if (cache_key in state.rdp_cache) { + return state.rdp_cache[cache_key]; + } + } let result = -1; let max_dist = 0; @@ -26,23 +34,17 @@ function rdp_find_max(state, zoom, stroke, start, end) { const dy = by - ay; const dist_ab = Math.sqrt(dx * dx + dy * dy); - const sin_theta = dy / dist_ab; - const cos_theta = dx / dist_ab; + const dir_nx = dy / dist_ab; + const dir_ny = -dx / dist_ab; - for (let i = start; i < end; ++i) { + for (let i = start + 1; i < end; ++i) { const px = state.coordinates.data[stroke.coords_from + i * 2 + 0]; const py = state.coordinates.data[stroke.coords_from + i * 2 + 1]; - const ox = px - ax; - const oy = py - ay; - - const rx = cos_theta * ox + sin_theta * oy; - const ry = -sin_theta * ox + cos_theta * oy; + const apx = px - ax; + const apy = py - ay; - const x = rx + ax; - const y = ry + ay; - - const dist = Math.abs(y - ay); + const dist = Math.abs(apx * dir_nx + apy * dir_ny); if (dist > EPS && dist > max_dist) { result = i; @@ -50,19 +52,36 @@ function rdp_find_max(state, zoom, stroke, start, end) { } } + state.stats.rdp_max_count++; + state.stats.rdp_segments += end - start - 1; + + if (end - start > config.rdp_cache_threshold) { + state.rdp_cache[cache_key] = result; + } + return result; } function process_rdp_indices_r(state, zoom, mask, stroke, start, end) { + // Looks like the recursive implementation spends most of its time in the function call overhead + // Let's try to use an explicit stack instead to give the js engine more room to play with + // Update: it's not faster. But it gives more sensible source-line samples in chrome profiler, so I'll leave it + let result = 0; + const stack = []; + + stack.push({'start': start, 'end': end}); - const max = rdp_find_max(state, zoom, stroke, start, end); + while (stack.length > 0) { + const region = stack.pop(); + const max = rdp_find_max(state, zoom, stroke, region.start, region.end); - if (max !== -1) { - mask[max] = 1; - result += 1; - result += process_rdp_indices_r(state, zoom, mask, stroke, start, max); - result += process_rdp_indices_r(state, zoom, mask, stroke, max, end); + if (max !== -1) { + mask[max] = 1; + result += 1; + stack.push({'start': region.start, 'end': max}); + stack.push({'start': max, 'end': region.end}); + } } return result; @@ -103,8 +122,18 @@ function process_ewmv(points, round = false) { } function process_stroke(state, zoom, stroke) { + // Try caching the highest zoom level that only returns the endpoints + if (zoom <= stroke.turns_into_straight_line_zoom) { + return 2; + } + // const result0 = process_ewmv(points); const result1 = process_rdp_indices(state, zoom, stroke, true); + + if (result1 === 2 && zoom > stroke.turns_into_straight_line_zoom) { + stroke.turns_into_straight_line_zoom = zoom; + } + return result1; } diff --git a/client/webgl_geometry.js b/client/webgl_geometry.js index ff37c01..8965913 100644 --- a/client/webgl_geometry.js +++ b/client/webgl_geometry.js @@ -40,8 +40,14 @@ function geometry_write_instances(state, context) { context.instance_data = ser_ensure(context.instance_data, state.coordinates.count / 2 * config.bytes_per_instance); ser_clear(context.instance_data); + state.stats.rdp_max_count = 0; + state.stats.rdp_segments = 0; + let segment_count = 0; + let fast_path_count = 0; + let slow_path_count = 0; + for (let i = 0; i < context.clipped_indices.count; ++i) { const stroke_index = context.clipped_indices.data[i]; const stroke = state.events[stroke_index]; @@ -49,29 +55,48 @@ function geometry_write_instances(state, context) { segment_count += lod_indices_count - 1; - let base_this = 0; - let base_next = 0; - - for (let j = 0; j < lod_indices_count - 1; ++j) { - while (state.rdp_mask[base_this] == 0) base_this++; - base_next = base_this + 1; - while (state.rdp_mask[base_next] == 0) base_next++; - - const ax = state.coordinates.data[stroke.coords_from + base_this * 2 + 0]; - const ay = state.coordinates.data[stroke.coords_from + base_this * 2 + 1]; - const bx = state.coordinates.data[stroke.coords_from + base_next * 2 + 0]; - const by = state.coordinates.data[stroke.coords_from + base_next * 2 + 1]; + if (lod_indices_count === 2) { + fast_path_count++; + // Fast path + const ax = state.coordinates.data[stroke.coords_from + 0]; + const ay = state.coordinates.data[stroke.coords_from + 1]; + const bx = state.coordinates.data[stroke.coords_to - 2]; + const by = state.coordinates.data[stroke.coords_to - 1]; ser_f32(context.instance_data, ax); ser_f32(context.instance_data, ay); ser_f32(context.instance_data, bx); ser_f32(context.instance_data, by); ser_u32(context.instance_data, stroke_index); - - base_this = base_next; + } else { + slow_path_count++; + let base_this = 0; + let base_next = 0; + + for (let j = 0; j < lod_indices_count - 1; ++j) { + while (state.rdp_mask[base_this] == 0) base_this++; + base_next = base_this + 1; + while (state.rdp_mask[base_next] == 0) base_next++; + + const ax = state.coordinates.data[stroke.coords_from + base_this * 2 + 0]; + const ay = state.coordinates.data[stroke.coords_from + base_this * 2 + 1]; + const bx = state.coordinates.data[stroke.coords_from + base_next * 2 + 0]; + const by = state.coordinates.data[stroke.coords_from + base_next * 2 + 1]; + + ser_f32(context.instance_data, ax); + ser_f32(context.instance_data, ay); + ser_f32(context.instance_data, bx); + ser_f32(context.instance_data, by); + ser_u32(context.instance_data, stroke_index); + + base_this = base_next; + } } } + console.debug('fast:', fast_path_count, 'slow:', slow_path_count); + console.debug('rdp max:', state.stats.rdp_max_count, 'rdp segments:', state.stats.rdp_segments); + return segment_count; } @@ -80,7 +105,8 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa stroke.bbox = stroke_bbox(state, stroke); stroke.area = box_area(stroke.bbox); - + stroke.turns_into_straight_line_zoom = -1; + context.stroke_data = ser_ensure_by(context.stroke_data, config.bytes_per_stroke); const color_u32 = stroke.color;