From 9bbab32369a2629599e78b30f0fc9ff329738b26 Mon Sep 17 00:00:00 2001
From: "A.Olokhtonov" <a.olokhtonov@norsi-trans.ru>
Date: Thu, 28 Dec 2023 02:11:46 +0300
Subject: [PATCH] Try to speed up the LOD calculation with a cache (kinda
 succesfull, but not really)

---
 client/index.html        | 32 +++++++++----------
 client/index.js          |  7 ++++-
 client/math.js           | 67 ++++++++++++++++++++++++++++------------
 client/webgl_geometry.js | 56 ++++++++++++++++++++++++---------
 4 files changed, 111 insertions(+), 51 deletions(-)
diff --git a/client/index.html b/client/index.html
index 2e8cb41..c8c94f7 100644
--- a/client/index.html
+++ b/client/index.html
@@ -7,22 +7,22 @@
     <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
 
     <link rel="shortcut icon" href="icons/favicon.svg" id="favicon">
-    <link rel="stylesheet" type="text/css" href="default.css?v=66">
-
-    <script type="text/javascript" src="aux.js?v=66"></script>
-    <script type="text/javascript" src="heapify.js?v=66"></script>
-    <script type="text/javascript" src="bvh.js?v=66"></script>
-    <script type="text/javascript" src="math.js?v=66"></script>
-    <script type="text/javascript" src="tools.js?v=66"></script>
-    <script type="text/javascript" src="webgl_geometry.js?v=66"></script>
-    <script type="text/javascript" src="webgl_shaders.js?v=66"></script>
-    <script type="text/javascript" src="webgl_listeners.js?v=66"></script>
-    <script type="text/javascript" src="webgl_draw.js?v=66"></script>
-    <script type="text/javascript" src="index.js?v=66"></script>
-
-    <script type="text/javascript" src="client_send.js?v=66"></script>
-    <script type="text/javascript" src="client_recv.js?v=66"></script>
-    <script type="text/javascript" src="websocket.js?v=66"></script>
+    <link rel="stylesheet" type="text/css" href="default.css?v=67">
+
+    <script type="text/javascript" src="aux.js?v=67"></script>
+    <script type="text/javascript" src="heapify.js?v=67"></script>
+    <script type="text/javascript" src="bvh.js?v=67"></script>
+    <script type="text/javascript" src="math.js?v=67"></script>
+    <script type="text/javascript" src="tools.js?v=67"></script>
+    <script type="text/javascript" src="webgl_geometry.js?v=67"></script>
+    <script type="text/javascript" src="webgl_shaders.js?v=67"></script>
+    <script type="text/javascript" src="webgl_listeners.js?v=67"></script>
+    <script type="text/javascript" src="webgl_draw.js?v=67"></script>
+    <script type="text/javascript" src="index.js?v=67"></script>
+
+    <script type="text/javascript" src="client_send.js?v=67"></script>
+    <script type="text/javascript" src="client_recv.js?v=67"></script>
+    <script type="text/javascript" src="websocket.js?v=67"></script>
 </head>
 <body>
     <div class="main">
diff --git a/client/index.js b/client/index.js
index 07f9724..f18ef44 100644
--- a/client/index.js
+++ b/client/index.js
@@ -31,6 +31,7 @@ const config = {
     tile_size: 16,
     clip_zoom_threshold: 0.00003,
     stroke_texture_size: 1024,
+    rdp_cache_threshold: 100,
 };
 
 const EVENT = Object.freeze({
@@ -209,7 +210,11 @@ function main() {
             'render_from': 0,
             'render_to': 0,
             'draw_bvh': false,
-        }
+        },
+
+        'rdp_cache': {},
+
+        'stats': {},
     };
 
     const context = {
diff --git a/client/math.js b/client/math.js
index a4714c7..1960d2f 100644
--- a/client/math.js
+++ b/client/math.js
@@ -11,8 +11,16 @@ function screen_to_canvas(state, p) {
 }
 
 function rdp_find_max(state, zoom, stroke, start, end) {
+    // Finds a point from the range [start, end) with the maximum distance from the line (start--end)
     const EPS = 1.0 / zoom;
-    // const EPS = 10.0;
+    let cache_key = null;
+
+    if (end - start > config.rdp_cache_threshold) {
+        cache_key = stroke.index + '-' + zoom + '-' + start + '-' + end;
+        if (cache_key in state.rdp_cache) {
+            return state.rdp_cache[cache_key];
+        }
+    }
 
     let result = -1;
     let max_dist = 0;
@@ -26,23 +34,17 @@ function rdp_find_max(state, zoom, stroke, start, end) {
     const dy = by - ay;
 
     const dist_ab = Math.sqrt(dx * dx + dy * dy);
-    const sin_theta = dy / dist_ab;
-    const cos_theta = dx / dist_ab;
+    const dir_nx = dy / dist_ab;
+    const dir_ny = -dx / dist_ab;
 
-    for (let i = start; i < end; ++i) {
+    for (let i = start + 1; i < end; ++i) {
         const px = state.coordinates.data[stroke.coords_from + i * 2 + 0];
         const py = state.coordinates.data[stroke.coords_from + i * 2 + 1];
         
-        const ox = px - ax;
-        const oy = py - ay;
-
-        const rx = cos_theta * ox + sin_theta * oy;
-        const ry = -sin_theta * ox + cos_theta * oy;
+        const apx = px - ax;
+        const apy = py - ay;
 
-        const x = rx + ax;
-        const y = ry + ay;
-
-        const dist = Math.abs(y - ay);
+        const dist = Math.abs(apx * dir_nx + apy * dir_ny);
 
         if (dist > EPS && dist > max_dist) {
             result = i;
@@ -50,19 +52,36 @@ function rdp_find_max(state, zoom, stroke, start, end) {
         }
     }
 
+    state.stats.rdp_max_count++;
+    state.stats.rdp_segments += end - start - 1;
+    
+    if (end - start > config.rdp_cache_threshold) { 
+        state.rdp_cache[cache_key] = result;
+    }
+
     return result;
 }
 
 function process_rdp_indices_r(state, zoom, mask, stroke, start, end) {
+    // Looks like the recursive implementation spends most of its time in the function call overhead
+    // Let's try to use an explicit stack instead to give the js engine more room to play with
+    // Update: it's not faster. But it gives more sensible source-line samples in chrome profiler, so I'll leave it 
+    
     let result = 0;
+    const stack = [];
+
+    stack.push({'start': start, 'end': end});
 
-    const max = rdp_find_max(state, zoom, stroke, start, end);
+    while (stack.length > 0) {
+        const region = stack.pop();
+        const max = rdp_find_max(state, zoom, stroke, region.start, region.end);
 
-    if (max !== -1) {
-        mask[max] = 1;
-        result += 1;
-        result += process_rdp_indices_r(state, zoom, mask, stroke, start, max);
-        result += process_rdp_indices_r(state, zoom, mask, stroke, max, end);
+        if (max !== -1) {
+            mask[max] = 1;
+            result += 1;
+            stack.push({'start': region.start, 'end': max});
+            stack.push({'start': max, 'end': region.end});
+        }
     }
 
     return result;
@@ -103,8 +122,18 @@ function process_ewmv(points, round = false) {
 }
 
 function process_stroke(state, zoom, stroke) {
+    // Try caching the highest zoom level that only returns the endpoints
+    if (zoom <= stroke.turns_into_straight_line_zoom) {
+        return 2; 
+    }
+
     // const result0 = process_ewmv(points);
     const result1 = process_rdp_indices(state, zoom, stroke, true);
+
+    if (result1 === 2 && zoom > stroke.turns_into_straight_line_zoom) {
+        stroke.turns_into_straight_line_zoom = zoom;
+    }
+
     return result1;
 }
 
diff --git a/client/webgl_geometry.js b/client/webgl_geometry.js
index ff37c01..8965913 100644
--- a/client/webgl_geometry.js
+++ b/client/webgl_geometry.js
@@ -40,8 +40,14 @@ function geometry_write_instances(state, context) {
     context.instance_data = ser_ensure(context.instance_data, state.coordinates.count / 2 * config.bytes_per_instance);
     ser_clear(context.instance_data);
 
+    state.stats.rdp_max_count = 0;
+    state.stats.rdp_segments = 0;
+
     let segment_count = 0;
 
+    let fast_path_count = 0;
+    let slow_path_count = 0;
+
     for (let i = 0; i < context.clipped_indices.count; ++i) {
         const stroke_index = context.clipped_indices.data[i];
         const stroke = state.events[stroke_index];
@@ -49,29 +55,48 @@ function geometry_write_instances(state, context) {
 
         segment_count += lod_indices_count - 1;
 
-        let base_this = 0;
-        let base_next = 0;
-
-        for (let j = 0; j < lod_indices_count - 1; ++j) {
-            while (state.rdp_mask[base_this] == 0) base_this++;
-            base_next = base_this + 1;
-            while (state.rdp_mask[base_next] == 0) base_next++;
-
-            const ax = state.coordinates.data[stroke.coords_from + base_this * 2 + 0];
-            const ay = state.coordinates.data[stroke.coords_from + base_this * 2 + 1];
-            const bx = state.coordinates.data[stroke.coords_from + base_next * 2 + 0];
-            const by = state.coordinates.data[stroke.coords_from + base_next * 2 + 1];
+        if (lod_indices_count === 2) {
+            fast_path_count++;
+            // Fast path
+            const ax = state.coordinates.data[stroke.coords_from + 0]; 
+            const ay = state.coordinates.data[stroke.coords_from + 1]; 
+            const bx = state.coordinates.data[stroke.coords_to - 2]; 
+            const by = state.coordinates.data[stroke.coords_to - 1]; 
 
             ser_f32(context.instance_data, ax);
             ser_f32(context.instance_data, ay);
             ser_f32(context.instance_data, bx);
             ser_f32(context.instance_data, by);
             ser_u32(context.instance_data, stroke_index);
-
-            base_this = base_next;
+        } else {
+            slow_path_count++;
+            let base_this = 0;
+            let base_next = 0;
+
+            for (let j = 0; j < lod_indices_count - 1; ++j) {
+                while (state.rdp_mask[base_this] == 0) base_this++;
+                base_next = base_this + 1;
+                while (state.rdp_mask[base_next] == 0) base_next++;
+
+                const ax = state.coordinates.data[stroke.coords_from + base_this * 2 + 0];
+                const ay = state.coordinates.data[stroke.coords_from + base_this * 2 + 1];
+                const bx = state.coordinates.data[stroke.coords_from + base_next * 2 + 0];
+                const by = state.coordinates.data[stroke.coords_from + base_next * 2 + 1];
+
+                ser_f32(context.instance_data, ax);
+                ser_f32(context.instance_data, ay);
+                ser_f32(context.instance_data, bx);
+                ser_f32(context.instance_data, by);
+                ser_u32(context.instance_data, stroke_index);
+
+                base_this = base_next;
+            }
         }
     }
 
+    console.debug('fast:', fast_path_count, 'slow:', slow_path_count);
+    console.debug('rdp max:', state.stats.rdp_max_count, 'rdp segments:', state.stats.rdp_segments);
+
     return segment_count;
 }
 
@@ -80,7 +105,8 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa
 
     stroke.bbox = stroke_bbox(state, stroke);
     stroke.area = box_area(stroke.bbox);
-   
+    stroke.turns_into_straight_line_zoom = -1;
+
     context.stroke_data = ser_ensure_by(context.stroke_data, config.bytes_per_stroke);
 
     const color_u32 = stroke.color;