From 248501e50f7224d6f4d46a02b0497510ef3b128d Mon Sep 17 00:00:00 2001 From: "A.Olokhtonov" Date: Tue, 26 Dec 2023 02:34:43 +0300 Subject: [PATCH] Remove recursive allocations from process_rdp Use rgba_u16 textures for rgb and, most importantly, W (stroke width) --- client/index.js | 13 +++-- client/math.js | 61 +++++++++++----------- client/webgl_draw.js | 71 ++++++++++++-------------- client/webgl_geometry.js | 106 +++++++++------------------------------ client/webgl_shaders.js | 27 ++++++---- 5 files changed, 112 insertions(+), 166 deletions(-) diff --git a/client/index.js b/client/index.js index 80d34ee..8ec8845 100644 --- a/client/index.js +++ b/client/index.js @@ -24,11 +24,13 @@ const config = { initial_offline_timeout: 1000, default_color: 0x00, default_width: 8, - bytes_per_point: 9 * 4, + bytes_per_quad: 4 * 4 + 4, // axy, bxy, stroke_id + bytes_per_stroke: 3 + 1, // r, g, b, width initial_static_bytes: 4096 * 16, initial_dynamic_bytes: 4096, tile_size: 16, clip_zoom_threshold: 0.00003, + stroke_texture_size: 1024, }; const EVENT = Object.freeze({ @@ -162,6 +164,8 @@ function main() { 'current_strokes': {}, + 'rdp_mask': new Uint8Array(1024), + 'queue': [], 'events': [], 'stroke_count': 0, @@ -217,6 +221,8 @@ function main() { 'lods': [], + 'stroke_data': serializer_create(config.initial_static_bytes), + 'bgcolor': {'r': 1.0, 'g': 1.0, 'b': 1.0}, 'gpu_timer_ext': null, @@ -232,10 +238,9 @@ function main() { for (let i = 0; i < config.lod_levels; ++i) { context.lods.push({ - 'max_zoom': Math.pow(0.5, i), // use this LOD level when current canvas.zoom is less than this value, but not less than the next level max_zoom (or if this is the last zoom level) + 'max_zoom': Math.pow(0.25, i), // use this LOD level when current canvas.zoom is less than this value, but not less than the next level max_zoom (or if this is the last zoom level) 'total_points': 0, - 'vertices': serializer_create(config.initial_static_bytes), - 'indices': serializer_create(config.initial_static_bytes), + 'segments': serializer_create(config.initial_static_bytes), 'data_buffer': null, 'index_buffer': null, }); diff --git a/client/math.js b/client/math.js index 71d58fb..f6503f3 100644 --- a/client/math.js +++ b/client/math.js @@ -6,8 +6,8 @@ function screen_to_canvas(state, p) { return {'x': xc, 'y': yc}; } -function rdp_find_max(state, points, start, end) { - const EPS = 1.0 / state.canvas.zoom; +function rdp_find_max(zoom, points, start, end) { + const EPS = 1.0 / zoom; // const EPS = 10.0; let result = -1; @@ -46,43 +46,44 @@ function rdp_find_max(state, points, start, end) { return result; } -function process_rdp_r(state, points, start, end) { - let result = []; - - const max = rdp_find_max(state, points, start, end); +function process_rdp_r(zoom, mask, points, start, end) { + let result = 0; + + const max = rdp_find_max(zoom, points, start, end); if (max !== -1) { - const before = process_rdp_r(state, points, start, max); - const after = process_rdp_r(state, points, max, end); - result = [...before, points[max], ...after]; + mask[max] = 1; + result += 1; + result += process_rdp_r(zoom, mask, points, start, max); + result += process_rdp_r(zoom, mask, points, max, end); } return result; } -function rdp_indices_r(zoom, points, start, end) { - let result = []; - - const max = rdp_find_max({'canvas': {'zoom': zoom}}, points, start, end); - - if (max !== -1) { - const before = rdp_indices_r(zoom, points, start, max); - const after = rdp_indices_r(zoom, points, max, end); - result = [...before, max, ...after]; +function process_rdp(state, zoom, points) { + if (state.rdp_mask.length < points.length) { + state.rdp_mask = new Uint8Array(points.length); } - return result; -} + state.rdp_mask.fill(0, 0, points.length); -function rdp_indices(zoom, points) { - const result = [0, ...rdp_indices_r(zoom, points, 0, points.length - 1), points.length - 1]; - return result; -} + const mask = state.rdp_mask; + const npoints = process_rdp_r(zoom, mask, points, 0, points.length - 1); + + mask[0] = 1; + mask[points.length - 1] = 1; + + const result = new Array(npoints); + let j = 0; + + for (let i = 0; i < points.length; ++i) { + if (mask[i] === 1) { + result[j] = points[i]; + ++j; + } + } -function process_rdp(state, points) { - const result = process_rdp_r(state, points, 0, points.length - 1); - result.unshift(points[0]); - result.push(points[points.length - 1]); return result; } @@ -102,9 +103,9 @@ function process_ewmv(points, round = false) { return result; } -function process_stroke(state, points) { +function process_stroke(state, zoom, points) { // const result0 = process_ewmv(points); - const result1 = process_rdp(state, points, true); + const result1 = process_rdp(state, zoom, points, true); return result1; } diff --git a/client/webgl_draw.js b/client/webgl_draw.js index b79daa4..28ecf32 100644 --- a/client/webgl_draw.js +++ b/client/webgl_draw.js @@ -24,6 +24,27 @@ function upload_if_needed(gl, buffer_kind, serializer) { } } +function upload_square_rgba16ui_texture(gl, serializer, texture_size) { + const bpp = 2 * 4; + const data_size = serializer.offset; + const data_pixels = data_size / bpp; // data_size % bpp is expected to always be zero here + + const rows = Math.ceil(data_pixels / texture_size); + const last_row = data_pixels % texture_size; + const whole_upload = (rows - 1) * texture_size * bpp; + + // Upload whole rows + if (rows > 1) { + gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, 0, texture_size, rows - 1, gl.RGBA_INTEGER, gl.UNSIGNED_SHORT, new Uint16Array(serializer.buffer, 0, whole_upload / 2)); + } + + // Upload last row + if (last_row > 0) { + const last_row_upload = last_row * bpp; + gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, rows - 1, last_row, 1, gl.RGBA_INTEGER, gl.UNSIGNED_SHORT, new Uint16Array(serializer.buffer, whole_upload, last_row_upload / 2)); + } +} + function draw(state, context) { const cpu_before = performance.now(); @@ -61,40 +82,23 @@ function draw(state, context) { gl.bindBuffer(gl.ARRAY_BUFFER, lod.data_buffer); gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, lod.index_buffer); - const quad_data = serializer_create(1024); - - ser_f32(quad_data, 0); - ser_f32(quad_data, 0); - ser_f32(quad_data, 200); - ser_f32(quad_data, 100); - ser_u32(quad_data, 0); - - ser_f32(quad_data, 200); - ser_f32(quad_data, 100); - ser_f32(quad_data, 255); - ser_f32(quad_data, 500); - ser_u32(quad_data, 0); - - ser_f32(quad_data, 100); - ser_f32(quad_data, 300); - ser_f32(quad_data, 125); - ser_f32(quad_data, 854); - ser_u32(quad_data, 1); - - - gl.bufferData(gl.ARRAY_BUFFER, new Uint8Array(quad_data.buffer, 0, quad_data.offset), gl.STATIC_DRAW); - gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, new Uint32Array([0, 1, 2, 3, 2, 1, 4, 5, 6, 7, 6, 5, 8, 9, 10, 11, 10, 9]), gl.STATIC_DRAW); + // static data, per-quad: points, stroke_ids + // static data, per-stroke (texture): color, width (radius) + upload_if_needed(gl, gl.ARRAY_BUFFER, lod.segments); locations = context.locations['sdf'].main; gl.useProgram(context.programs['sdf'].main); + const segment_count = lod.segments.offset / config.bytes_per_quad; + gl.uniform2f(locations['u_res'], context.canvas.width, context.canvas.height); gl.uniform2f(locations['u_scale'], state.canvas.zoom, state.canvas.zoom); gl.uniform2f(locations['u_translation'], state.canvas.offset.x, state.canvas.offset.y); - gl.uniform1i(locations['u_stroke_count'], 2); + gl.uniform1i(locations['u_stroke_count'], state.events.length); gl.uniform1i(locations['u_debug_mode'], state.debug.red); gl.uniform1i(locations['u_stroke_data'], 0); + gl.uniform1i(locations['u_stroke_texture_size'], config.stroke_texture_size); gl.enableVertexAttribArray(locations['a_ab']); gl.enableVertexAttribArray(locations['a_stroke_id']); @@ -105,23 +109,12 @@ function draw(state, context) { gl.vertexAttribDivisor(locations['a_ab'], 1); gl.vertexAttribDivisor(locations['a_stroke_id'], 1); - const stroke_data = serializer_create(1024); - - ser_u8(stroke_data, 255); - ser_u8(stroke_data, 0); - ser_u8(stroke_data, 0); - ser_u8(stroke_data, 8); - - ser_u8(stroke_data, 0); - ser_u8(stroke_data, 0); - ser_u8(stroke_data, 255); - ser_u8(stroke_data, 1); - gl.bindTexture(gl.TEXTURE_2D, context.textures['stroke_data']); - gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, 0, 2, 1, gl.RGBA, gl.UNSIGNED_BYTE, new Uint8Array([255, 0, 0, 16, 0, 0, 255, 2])); + upload_square_rgba16ui_texture(gl, context.stroke_data, config.stroke_texture_size); gl.activeTexture(gl.TEXTURE0); - gl.drawElementsInstanced(gl.TRIANGLES, 6, gl.UNSIGNED_INT, 0, 3); + gl.drawArraysInstanced(gl.TRIANGLES, 0, 6, segment_count); // TODO: based on clipping results + /* @@ -316,7 +309,7 @@ function draw(state, context) { gl.deleteQuery(query); // Don't re-enter this polling loop. query = null; - } else { + } else if (!available) { setTimeout(next_tick, 0); } } diff --git a/client/webgl_geometry.js b/client/webgl_geometry.js index 11c71d7..58c3e3f 100644 --- a/client/webgl_geometry.js +++ b/client/webgl_geometry.js @@ -1,76 +1,19 @@ -function push_point(s, x, y, ax, ay, bx, by, thickness, r, g, b, stroke_id) { - ser_f32(s, x); - ser_f32(s, y); - ser_f32(s, thickness); - ser_f32(s, ax); - ser_f32(s, ay); - ser_f32(s, bx); - ser_f32(s, by); - ser_u8(s, r); - ser_u8(s, g); - ser_u8(s, b); - ser_align(s, 4); - ser_u32(s, stroke_id); -} - -function push_quad(s, p1x, p1y, p2x, p2y, p3x, p3y, p4x, p4y, ax, ay, bx, by, thickness, r, g, b, stroke_id) { - push_point(s, p1x, p1y, ax, ay, bx, by, thickness, r, g, b, stroke_id); - push_point(s, p2x, p2y, ax, ay, bx, by, thickness, r, g, b, stroke_id); - push_point(s, p3x, p3y, ax, ay, bx, by, thickness, r, g, b, stroke_id); - push_point(s, p4x, p4y, ax, ay, bx, by, thickness, r, g, b, stroke_id); -} - function push_stroke(s, stroke, stroke_index) { - const stroke_width = stroke.width; const points = stroke.points; - const color_u32 = stroke.color; - const radius = stroke_width / 2; if (points.length < 2) { return; } - const r = (color_u32 >> 16) & 0xFF; - const g = (color_u32 >> 8) & 0xFF; - const b = color_u32 & 0xFF; - for (let i = 0; i < points.length - 1; ++i) { const from = points[i]; const to = points[i + 1]; - - const dir_x = to.x - from.x; - const dir_y = to.y - from.y; - const len = Math.sqrt(dir_x * dir_x + dir_y * dir_y); - - const dir1_x = dir_x / len; - const dir1_y = dir_y / len; - - const up_x = dir_y / len; - const up_y = -dir_x / len; - - let p1_x = from.x + (up_x - dir1_x) * radius; - let p1_y = from.y + (up_y - dir1_y) * radius; - - let p2_x = to.x + (up_x + dir1_x) * radius; - let p2_y = to.y + (up_y + dir1_y) * radius; - - let p3_x = from.x + (-up_x - dir1_x) * radius; - let p3_y = from.y + (-up_y - dir1_y) * radius; - - let p4_x = to.x + (-up_x + dir1_x) * radius; - let p4_y = to.y + (-up_y + dir1_y) * radius; - - push_quad(s, - p1_x, p1_y, - p2_x, p2_y, - p3_x, p3_y, - p4_x, p4_y, - from.x, from.y, - to.x, to.y, - stroke_width, - r, g, b, - stroke_index - ); + + ser_f32(s, from.x); + ser_f32(s, from.y); + ser_f32(s, to.x); + ser_f32(s, to.y); + ser_u32(s, stroke_index); } } @@ -83,7 +26,7 @@ function geometry_prepare_stroke(state) { return null; } - const points = process_stroke(state, state.players[state.me].points); + const points = process_stroke(state, state.canvas.zoom, state.players[state.me].points); return { 'color': state.players[state.me].color, @@ -99,23 +42,10 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa stroke.index = state.events.length; for (let i = 0; i < config.lod_levels; ++i) { - // TODO: just pass zoom to process_stroke ? - const saved_zoom = state.canvas.zoom; - state.canvas.zoom = Math.pow(0.5, i); - const points = (i > 0 ? process_stroke(state, stroke.points) : stroke.points); - state.canvas.zoom = saved_zoom; - - const vertex_serializer = context.lods[i].vertices = ser_ensure_by(context.lods[i].vertices, points.length * 4 * config.bytes_per_point); - /* - event.index = state.events.length; - event.starting_index = state.starting_index; - - if (event.points.length > 1) { - state.starting_index += (event.points.length - 1) * 4; - } + const lod = context.lods[i]; - state.total_points += event.points.length; - */ + const points = (i > 0 ? process_stroke(state, lod.max_zoom, stroke.points) : stroke.points); + const segment_serializer = lod.segments = ser_ensure_by(lod.segments, (points.length - 1) * config.bytes_per_quad); let starting_index = 0; @@ -133,14 +63,26 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa context.lods[i].total_points += points.length; - push_stroke(vertex_serializer, stroke.lods[stroke.lods.length - 1], stroke_index); + push_stroke(segment_serializer, stroke.lods[stroke.lods.length - 1], stroke_index); if (i === 0) { stroke.bbox = stroke_bbox(stroke); stroke.area = (stroke.bbox.x2 - stroke.bbox.x1) * (stroke.bbox.y2 - stroke.bbox.y1); } } - + + context.stroke_data = ser_ensure_by(context.stroke_data, config.bytes_per_stroke); + + const color_u32 = stroke.color; + const r = (color_u32 >> 16) & 0xFF; + const g = (color_u32 >> 8) & 0xFF; + const b = color_u32 & 0xFF; + + ser_u16(context.stroke_data, r); + ser_u16(context.stroke_data, g); + ser_u16(context.stroke_data, b); + ser_u16(context.stroke_data, stroke.width); + if (!skip_bvh) bvh_add_stroke(state.bvh, stroke_index, stroke); } diff --git a/client/webgl_shaders.js b/client/webgl_shaders.js index ca0e408..816b470 100644 --- a/client/webgl_shaders.js +++ b/client/webgl_shaders.js @@ -35,7 +35,7 @@ const simple_fs_src = `#version 300 es precision highp float; in vec2 v_uv; flat in int v_quad_id; - out vec4 FragColor; + layout(location = 0) out vec4 FragColor; void main() { vec2 pixel = fwidth(v_uv); vec2 border = 2.0 * pixel; @@ -94,7 +94,7 @@ const opaque_vs_src = `#version 300 es const nop_fs_src = `#version 300 es precision highp float; flat in int v_stroke_id; - out vec4 FragColor; + layout(location = 0) out vec4 FragColor; void main() { vec3 color = vec3(float(v_stroke_id * 3245 % 255) / 255.0, float(v_stroke_id * 7343 % 255) / 255.0, float(v_stroke_id * 5528 % 255) / 255.0); FragColor = vec4(color, 1.0); @@ -110,8 +110,9 @@ const sdf_vs_src = `#version 300 es uniform vec2 u_res; uniform vec2 u_translation; uniform int u_stroke_count; + uniform int u_stroke_texture_size; - uniform highp sampler2D u_stroke_data; + uniform highp usampler2D u_stroke_data; out vec4 v_line; out vec2 v_texcoord; @@ -122,9 +123,12 @@ const sdf_vs_src = `#version 300 es void main() { vec2 screen02; float apron = 1.0; // google "futanari inflation rule 34" + + int stroke_data_y = a_stroke_id / u_stroke_texture_size; + int stroke_data_x = a_stroke_id % u_stroke_texture_size; - vec4 stroke_data = texelFetch(u_stroke_data, ivec2(a_stroke_id, 0), 0); - float radius = stroke_data.w * 255.0; + uvec4 stroke_data = texelFetch(u_stroke_data, ivec2(stroke_data_x, stroke_data_y), 0); + float radius = float(stroke_data.w); vec2 a = a_ab.xy; vec2 b = a_ab.zw; @@ -165,7 +169,7 @@ const sdf_vs_src = `#version 300 es v_line = vec4(a, b); v_thickness = radius; - v_color = stroke_data.xyz; + v_color = vec3(stroke_data.xyz) / 255.0; gl_Position = vec4(screen02 - 1.0, (float(a_stroke_id) / float(u_stroke_count)) * 2.0 - 1.0, 1); } @@ -182,7 +186,7 @@ const sdf_fs_src = `#version 300 es flat in float v_thickness; - out vec4 FragColor; + layout(location = 0) out vec4 FragColor; void main() { if (u_debug_mode == 0) { @@ -231,7 +235,7 @@ const tquad_fs_src = `#version 300 es uniform sampler2D u_texture; uniform bool u_outline; - out vec4 FragColor; + layout(location = 0) out vec4 FragColor; void main() { if (!u_outline) { @@ -257,12 +261,12 @@ function init_webgl(state, context) { gl.enable(gl.DEPTH_TEST); gl.depthFunc(gl.GEQUAL); -/* + context.gpu_timer_ext = gl.getExtension('EXT_disjoint_timer_query_webgl2'); if (context.gpu_timer_ext === null) { context.gpu_timer_ext = gl.getExtension('EXT_disjoint_timer_query'); } -*/ + const quad_vs = create_shader(gl, gl.VERTEX_SHADER, tquad_vs_src); const quad_fs = create_shader(gl, gl.FRAGMENT_SHADER, tquad_fs_src); @@ -312,6 +316,7 @@ function init_webgl(state, context) { 'u_debug_mode': gl.getUniformLocation(context.programs['sdf'].main, 'u_debug_mode'), 'u_stroke_count': gl.getUniformLocation(context.programs['sdf'].main, 'u_stroke_count'), 'u_stroke_data': gl.getUniformLocation(context.programs['sdf'].main, 'u_stroke_data'), + 'u_stroke_texture_size': gl.getUniformLocation(context.programs['sdf'].main, 'u_stroke_texture_size'), } }; @@ -337,7 +342,7 @@ function init_webgl(state, context) { gl.bindTexture(gl.TEXTURE_2D, context.textures['stroke_data']); gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST); gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST); - gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, 2, 1, 0, gl.RGBA, gl.UNSIGNED_BYTE, null); + gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA16UI, config.stroke_texture_size, config.stroke_texture_size, 0, gl.RGBA_INTEGER, gl.UNSIGNED_SHORT, null); const resize_canvas = (entries) => { // https://www.khronos.org/webgl/wiki/HandlingHighDPI