Browse Source

Remove recursive allocations from process_rdp

Use rgba_u16 textures for rgb and, most importantly, W (stroke width)
ssao
A.Olokhtonov 11 months ago
parent
commit
248501e50f
  1. 13
      client/index.js
  2. 61
      client/math.js
  3. 71
      client/webgl_draw.js
  4. 106
      client/webgl_geometry.js
  5. 27
      client/webgl_shaders.js

13
client/index.js

@ -24,11 +24,13 @@ const config = { @@ -24,11 +24,13 @@ const config = {
initial_offline_timeout: 1000,
default_color: 0x00,
default_width: 8,
bytes_per_point: 9 * 4,
bytes_per_quad: 4 * 4 + 4, // axy, bxy, stroke_id
bytes_per_stroke: 3 + 1, // r, g, b, width
initial_static_bytes: 4096 * 16,
initial_dynamic_bytes: 4096,
tile_size: 16,
clip_zoom_threshold: 0.00003,
stroke_texture_size: 1024,
};
const EVENT = Object.freeze({
@ -162,6 +164,8 @@ function main() { @@ -162,6 +164,8 @@ function main() {
'current_strokes': {},
'rdp_mask': new Uint8Array(1024),
'queue': [],
'events': [],
'stroke_count': 0,
@ -217,6 +221,8 @@ function main() { @@ -217,6 +221,8 @@ function main() {
'lods': [],
'stroke_data': serializer_create(config.initial_static_bytes),
'bgcolor': {'r': 1.0, 'g': 1.0, 'b': 1.0},
'gpu_timer_ext': null,
@ -232,10 +238,9 @@ function main() { @@ -232,10 +238,9 @@ function main() {
for (let i = 0; i < config.lod_levels; ++i) {
context.lods.push({
'max_zoom': Math.pow(0.5, i), // use this LOD level when current canvas.zoom is less than this value, but not less than the next level max_zoom (or if this is the last zoom level)
'max_zoom': Math.pow(0.25, i), // use this LOD level when current canvas.zoom is less than this value, but not less than the next level max_zoom (or if this is the last zoom level)
'total_points': 0,
'vertices': serializer_create(config.initial_static_bytes),
'indices': serializer_create(config.initial_static_bytes),
'segments': serializer_create(config.initial_static_bytes),
'data_buffer': null,
'index_buffer': null,
});

61
client/math.js

@ -6,8 +6,8 @@ function screen_to_canvas(state, p) { @@ -6,8 +6,8 @@ function screen_to_canvas(state, p) {
return {'x': xc, 'y': yc};
}
function rdp_find_max(state, points, start, end) {
const EPS = 1.0 / state.canvas.zoom;
function rdp_find_max(zoom, points, start, end) {
const EPS = 1.0 / zoom;
// const EPS = 10.0;
let result = -1;
@ -46,43 +46,44 @@ function rdp_find_max(state, points, start, end) { @@ -46,43 +46,44 @@ function rdp_find_max(state, points, start, end) {
return result;
}
function process_rdp_r(state, points, start, end) {
let result = [];
const max = rdp_find_max(state, points, start, end);
function process_rdp_r(zoom, mask, points, start, end) {
let result = 0;
const max = rdp_find_max(zoom, points, start, end);
if (max !== -1) {
const before = process_rdp_r(state, points, start, max);
const after = process_rdp_r(state, points, max, end);
result = [...before, points[max], ...after];
mask[max] = 1;
result += 1;
result += process_rdp_r(zoom, mask, points, start, max);
result += process_rdp_r(zoom, mask, points, max, end);
}
return result;
}
function rdp_indices_r(zoom, points, start, end) {
let result = [];
const max = rdp_find_max({'canvas': {'zoom': zoom}}, points, start, end);
if (max !== -1) {
const before = rdp_indices_r(zoom, points, start, max);
const after = rdp_indices_r(zoom, points, max, end);
result = [...before, max, ...after];
function process_rdp(state, zoom, points) {
if (state.rdp_mask.length < points.length) {
state.rdp_mask = new Uint8Array(points.length);
}
return result;
}
state.rdp_mask.fill(0, 0, points.length);
function rdp_indices(zoom, points) {
const result = [0, ...rdp_indices_r(zoom, points, 0, points.length - 1), points.length - 1];
return result;
}
const mask = state.rdp_mask;
const npoints = process_rdp_r(zoom, mask, points, 0, points.length - 1);
mask[0] = 1;
mask[points.length - 1] = 1;
const result = new Array(npoints);
let j = 0;
for (let i = 0; i < points.length; ++i) {
if (mask[i] === 1) {
result[j] = points[i];
++j;
}
}
function process_rdp(state, points) {
const result = process_rdp_r(state, points, 0, points.length - 1);
result.unshift(points[0]);
result.push(points[points.length - 1]);
return result;
}
@ -102,9 +103,9 @@ function process_ewmv(points, round = false) { @@ -102,9 +103,9 @@ function process_ewmv(points, round = false) {
return result;
}
function process_stroke(state, points) {
function process_stroke(state, zoom, points) {
// const result0 = process_ewmv(points);
const result1 = process_rdp(state, points, true);
const result1 = process_rdp(state, zoom, points, true);
return result1;
}

71
client/webgl_draw.js

@ -24,6 +24,27 @@ function upload_if_needed(gl, buffer_kind, serializer) { @@ -24,6 +24,27 @@ function upload_if_needed(gl, buffer_kind, serializer) {
}
}
function upload_square_rgba16ui_texture(gl, serializer, texture_size) {
const bpp = 2 * 4;
const data_size = serializer.offset;
const data_pixels = data_size / bpp; // data_size % bpp is expected to always be zero here
const rows = Math.ceil(data_pixels / texture_size);
const last_row = data_pixels % texture_size;
const whole_upload = (rows - 1) * texture_size * bpp;
// Upload whole rows
if (rows > 1) {
gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, 0, texture_size, rows - 1, gl.RGBA_INTEGER, gl.UNSIGNED_SHORT, new Uint16Array(serializer.buffer, 0, whole_upload / 2));
}
// Upload last row
if (last_row > 0) {
const last_row_upload = last_row * bpp;
gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, rows - 1, last_row, 1, gl.RGBA_INTEGER, gl.UNSIGNED_SHORT, new Uint16Array(serializer.buffer, whole_upload, last_row_upload / 2));
}
}
function draw(state, context) {
const cpu_before = performance.now();
@ -61,40 +82,23 @@ function draw(state, context) { @@ -61,40 +82,23 @@ function draw(state, context) {
gl.bindBuffer(gl.ARRAY_BUFFER, lod.data_buffer);
gl.bindBuffer(gl.ELEMENT_ARRAY_BUFFER, lod.index_buffer);
const quad_data = serializer_create(1024);
ser_f32(quad_data, 0);
ser_f32(quad_data, 0);
ser_f32(quad_data, 200);
ser_f32(quad_data, 100);
ser_u32(quad_data, 0);
ser_f32(quad_data, 200);
ser_f32(quad_data, 100);
ser_f32(quad_data, 255);
ser_f32(quad_data, 500);
ser_u32(quad_data, 0);
ser_f32(quad_data, 100);
ser_f32(quad_data, 300);
ser_f32(quad_data, 125);
ser_f32(quad_data, 854);
ser_u32(quad_data, 1);
gl.bufferData(gl.ARRAY_BUFFER, new Uint8Array(quad_data.buffer, 0, quad_data.offset), gl.STATIC_DRAW);
gl.bufferData(gl.ELEMENT_ARRAY_BUFFER, new Uint32Array([0, 1, 2, 3, 2, 1, 4, 5, 6, 7, 6, 5, 8, 9, 10, 11, 10, 9]), gl.STATIC_DRAW);
// static data, per-quad: points, stroke_ids
// static data, per-stroke (texture): color, width (radius)
upload_if_needed(gl, gl.ARRAY_BUFFER, lod.segments);
locations = context.locations['sdf'].main;
gl.useProgram(context.programs['sdf'].main);
const segment_count = lod.segments.offset / config.bytes_per_quad;
gl.uniform2f(locations['u_res'], context.canvas.width, context.canvas.height);
gl.uniform2f(locations['u_scale'], state.canvas.zoom, state.canvas.zoom);
gl.uniform2f(locations['u_translation'], state.canvas.offset.x, state.canvas.offset.y);
gl.uniform1i(locations['u_stroke_count'], 2);
gl.uniform1i(locations['u_stroke_count'], state.events.length);
gl.uniform1i(locations['u_debug_mode'], state.debug.red);
gl.uniform1i(locations['u_stroke_data'], 0);
gl.uniform1i(locations['u_stroke_texture_size'], config.stroke_texture_size);
gl.enableVertexAttribArray(locations['a_ab']);
gl.enableVertexAttribArray(locations['a_stroke_id']);
@ -105,23 +109,12 @@ function draw(state, context) { @@ -105,23 +109,12 @@ function draw(state, context) {
gl.vertexAttribDivisor(locations['a_ab'], 1);
gl.vertexAttribDivisor(locations['a_stroke_id'], 1);
const stroke_data = serializer_create(1024);
ser_u8(stroke_data, 255);
ser_u8(stroke_data, 0);
ser_u8(stroke_data, 0);
ser_u8(stroke_data, 8);
ser_u8(stroke_data, 0);
ser_u8(stroke_data, 0);
ser_u8(stroke_data, 255);
ser_u8(stroke_data, 1);
gl.bindTexture(gl.TEXTURE_2D, context.textures['stroke_data']);
gl.texSubImage2D(gl.TEXTURE_2D, 0, 0, 0, 2, 1, gl.RGBA, gl.UNSIGNED_BYTE, new Uint8Array([255, 0, 0, 16, 0, 0, 255, 2]));
upload_square_rgba16ui_texture(gl, context.stroke_data, config.stroke_texture_size);
gl.activeTexture(gl.TEXTURE0);
gl.drawElementsInstanced(gl.TRIANGLES, 6, gl.UNSIGNED_INT, 0, 3);
gl.drawArraysInstanced(gl.TRIANGLES, 0, 6, segment_count); // TODO: based on clipping results
/*
@ -316,7 +309,7 @@ function draw(state, context) { @@ -316,7 +309,7 @@ function draw(state, context) {
gl.deleteQuery(query);
// Don't re-enter this polling loop.
query = null;
} else {
} else if (!available) {
setTimeout(next_tick, 0);
}
}

106
client/webgl_geometry.js

@ -1,76 +1,19 @@ @@ -1,76 +1,19 @@
function push_point(s, x, y, ax, ay, bx, by, thickness, r, g, b, stroke_id) {
ser_f32(s, x);
ser_f32(s, y);
ser_f32(s, thickness);
ser_f32(s, ax);
ser_f32(s, ay);
ser_f32(s, bx);
ser_f32(s, by);
ser_u8(s, r);
ser_u8(s, g);
ser_u8(s, b);
ser_align(s, 4);
ser_u32(s, stroke_id);
}
function push_quad(s, p1x, p1y, p2x, p2y, p3x, p3y, p4x, p4y, ax, ay, bx, by, thickness, r, g, b, stroke_id) {
push_point(s, p1x, p1y, ax, ay, bx, by, thickness, r, g, b, stroke_id);
push_point(s, p2x, p2y, ax, ay, bx, by, thickness, r, g, b, stroke_id);
push_point(s, p3x, p3y, ax, ay, bx, by, thickness, r, g, b, stroke_id);
push_point(s, p4x, p4y, ax, ay, bx, by, thickness, r, g, b, stroke_id);
}
function push_stroke(s, stroke, stroke_index) {
const stroke_width = stroke.width;
const points = stroke.points;
const color_u32 = stroke.color;
const radius = stroke_width / 2;
if (points.length < 2) {
return;
}
const r = (color_u32 >> 16) & 0xFF;
const g = (color_u32 >> 8) & 0xFF;
const b = color_u32 & 0xFF;
for (let i = 0; i < points.length - 1; ++i) {
const from = points[i];
const to = points[i + 1];
const dir_x = to.x - from.x;
const dir_y = to.y - from.y;
const len = Math.sqrt(dir_x * dir_x + dir_y * dir_y);
const dir1_x = dir_x / len;
const dir1_y = dir_y / len;
const up_x = dir_y / len;
const up_y = -dir_x / len;
let p1_x = from.x + (up_x - dir1_x) * radius;
let p1_y = from.y + (up_y - dir1_y) * radius;
let p2_x = to.x + (up_x + dir1_x) * radius;
let p2_y = to.y + (up_y + dir1_y) * radius;
let p3_x = from.x + (-up_x - dir1_x) * radius;
let p3_y = from.y + (-up_y - dir1_y) * radius;
let p4_x = to.x + (-up_x + dir1_x) * radius;
let p4_y = to.y + (-up_y + dir1_y) * radius;
push_quad(s,
p1_x, p1_y,
p2_x, p2_y,
p3_x, p3_y,
p4_x, p4_y,
from.x, from.y,
to.x, to.y,
stroke_width,
r, g, b,
stroke_index
);
ser_f32(s, from.x);
ser_f32(s, from.y);
ser_f32(s, to.x);
ser_f32(s, to.y);
ser_u32(s, stroke_index);
}
}
@ -83,7 +26,7 @@ function geometry_prepare_stroke(state) { @@ -83,7 +26,7 @@ function geometry_prepare_stroke(state) {
return null;
}
const points = process_stroke(state, state.players[state.me].points);
const points = process_stroke(state, state.canvas.zoom, state.players[state.me].points);
return {
'color': state.players[state.me].color,
@ -99,23 +42,10 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa @@ -99,23 +42,10 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa
stroke.index = state.events.length;
for (let i = 0; i < config.lod_levels; ++i) {
// TODO: just pass zoom to process_stroke ?
const saved_zoom = state.canvas.zoom;
state.canvas.zoom = Math.pow(0.5, i);
const points = (i > 0 ? process_stroke(state, stroke.points) : stroke.points);
state.canvas.zoom = saved_zoom;
const vertex_serializer = context.lods[i].vertices = ser_ensure_by(context.lods[i].vertices, points.length * 4 * config.bytes_per_point);
/*
event.index = state.events.length;
event.starting_index = state.starting_index;
if (event.points.length > 1) {
state.starting_index += (event.points.length - 1) * 4;
}
const lod = context.lods[i];
state.total_points += event.points.length;
*/
const points = (i > 0 ? process_stroke(state, lod.max_zoom, stroke.points) : stroke.points);
const segment_serializer = lod.segments = ser_ensure_by(lod.segments, (points.length - 1) * config.bytes_per_quad);
let starting_index = 0;
@ -133,14 +63,26 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa @@ -133,14 +63,26 @@ function geometry_add_stroke(state, context, stroke, stroke_index, skip_bvh = fa
context.lods[i].total_points += points.length;
push_stroke(vertex_serializer, stroke.lods[stroke.lods.length - 1], stroke_index);
push_stroke(segment_serializer, stroke.lods[stroke.lods.length - 1], stroke_index);
if (i === 0) {
stroke.bbox = stroke_bbox(stroke);
stroke.area = (stroke.bbox.x2 - stroke.bbox.x1) * (stroke.bbox.y2 - stroke.bbox.y1);
}
}
context.stroke_data = ser_ensure_by(context.stroke_data, config.bytes_per_stroke);
const color_u32 = stroke.color;
const r = (color_u32 >> 16) & 0xFF;
const g = (color_u32 >> 8) & 0xFF;
const b = color_u32 & 0xFF;
ser_u16(context.stroke_data, r);
ser_u16(context.stroke_data, g);
ser_u16(context.stroke_data, b);
ser_u16(context.stroke_data, stroke.width);
if (!skip_bvh) bvh_add_stroke(state.bvh, stroke_index, stroke);
}

27
client/webgl_shaders.js

@ -35,7 +35,7 @@ const simple_fs_src = `#version 300 es @@ -35,7 +35,7 @@ const simple_fs_src = `#version 300 es
precision highp float;
in vec2 v_uv;
flat in int v_quad_id;
out vec4 FragColor;
layout(location = 0) out vec4 FragColor;
void main() {
vec2 pixel = fwidth(v_uv);
vec2 border = 2.0 * pixel;
@ -94,7 +94,7 @@ const opaque_vs_src = `#version 300 es @@ -94,7 +94,7 @@ const opaque_vs_src = `#version 300 es
const nop_fs_src = `#version 300 es
precision highp float;
flat in int v_stroke_id;
out vec4 FragColor;
layout(location = 0) out vec4 FragColor;
void main() {
vec3 color = vec3(float(v_stroke_id * 3245 % 255) / 255.0, float(v_stroke_id * 7343 % 255) / 255.0, float(v_stroke_id * 5528 % 255) / 255.0);
FragColor = vec4(color, 1.0);
@ -110,8 +110,9 @@ const sdf_vs_src = `#version 300 es @@ -110,8 +110,9 @@ const sdf_vs_src = `#version 300 es
uniform vec2 u_res;
uniform vec2 u_translation;
uniform int u_stroke_count;
uniform int u_stroke_texture_size;
uniform highp sampler2D u_stroke_data;
uniform highp usampler2D u_stroke_data;
out vec4 v_line;
out vec2 v_texcoord;
@ -122,9 +123,12 @@ const sdf_vs_src = `#version 300 es @@ -122,9 +123,12 @@ const sdf_vs_src = `#version 300 es
void main() {
vec2 screen02;
float apron = 1.0; // google "futanari inflation rule 34"
int stroke_data_y = a_stroke_id / u_stroke_texture_size;
int stroke_data_x = a_stroke_id % u_stroke_texture_size;
vec4 stroke_data = texelFetch(u_stroke_data, ivec2(a_stroke_id, 0), 0);
float radius = stroke_data.w * 255.0;
uvec4 stroke_data = texelFetch(u_stroke_data, ivec2(stroke_data_x, stroke_data_y), 0);
float radius = float(stroke_data.w);
vec2 a = a_ab.xy;
vec2 b = a_ab.zw;
@ -165,7 +169,7 @@ const sdf_vs_src = `#version 300 es @@ -165,7 +169,7 @@ const sdf_vs_src = `#version 300 es
v_line = vec4(a, b);
v_thickness = radius;
v_color = stroke_data.xyz;
v_color = vec3(stroke_data.xyz) / 255.0;
gl_Position = vec4(screen02 - 1.0, (float(a_stroke_id) / float(u_stroke_count)) * 2.0 - 1.0, 1);
}
@ -182,7 +186,7 @@ const sdf_fs_src = `#version 300 es @@ -182,7 +186,7 @@ const sdf_fs_src = `#version 300 es
flat in float v_thickness;
out vec4 FragColor;
layout(location = 0) out vec4 FragColor;
void main() {
if (u_debug_mode == 0) {
@ -231,7 +235,7 @@ const tquad_fs_src = `#version 300 es @@ -231,7 +235,7 @@ const tquad_fs_src = `#version 300 es
uniform sampler2D u_texture;
uniform bool u_outline;
out vec4 FragColor;
layout(location = 0) out vec4 FragColor;
void main() {
if (!u_outline) {
@ -257,12 +261,12 @@ function init_webgl(state, context) { @@ -257,12 +261,12 @@ function init_webgl(state, context) {
gl.enable(gl.DEPTH_TEST);
gl.depthFunc(gl.GEQUAL);
/*
context.gpu_timer_ext = gl.getExtension('EXT_disjoint_timer_query_webgl2');
if (context.gpu_timer_ext === null) {
context.gpu_timer_ext = gl.getExtension('EXT_disjoint_timer_query');
}
*/
const quad_vs = create_shader(gl, gl.VERTEX_SHADER, tquad_vs_src);
const quad_fs = create_shader(gl, gl.FRAGMENT_SHADER, tquad_fs_src);
@ -312,6 +316,7 @@ function init_webgl(state, context) { @@ -312,6 +316,7 @@ function init_webgl(state, context) {
'u_debug_mode': gl.getUniformLocation(context.programs['sdf'].main, 'u_debug_mode'),
'u_stroke_count': gl.getUniformLocation(context.programs['sdf'].main, 'u_stroke_count'),
'u_stroke_data': gl.getUniformLocation(context.programs['sdf'].main, 'u_stroke_data'),
'u_stroke_texture_size': gl.getUniformLocation(context.programs['sdf'].main, 'u_stroke_texture_size'),
}
};
@ -337,7 +342,7 @@ function init_webgl(state, context) { @@ -337,7 +342,7 @@ function init_webgl(state, context) {
gl.bindTexture(gl.TEXTURE_2D, context.textures['stroke_data']);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.NEAREST);
gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.NEAREST);
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, 2, 1, 0, gl.RGBA, gl.UNSIGNED_BYTE, null);
gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA16UI, config.stroke_texture_size, config.stroke_texture_size, 0, gl.RGBA_INTEGER, gl.UNSIGNED_SHORT, null);
const resize_canvas = (entries) => {
// https://www.khronos.org/webgl/wiki/HandlingHighDPI

Loading…
Cancel
Save