You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
8.8 KiB

function worker_message(worker, message) {
return new Promise((resolve) => {
worker.onmessage = (e) => resolve(e.data);
worker.postMessage(message);
});
}
function workers_messages(workers, messages) {
const promises = [];
for (let i = 0; i < workers.length; ++i) {
promises.push(worker_message(workers[i], messages[i]));
}
return Promise.all(promises);
}
function workers_thread_message(workers, message, thread_field=null) {
const messages = [];
for (let i = 0; i < workers.length; ++i) {
if (thread_field !== null) {
const m = structuredClone(message);
m[thread_field] = i;
messages.push(m);
} else {
messages.push(message);
}
}
return workers_messages(workers, messages);
}
async function init_wasm(state) {
const memory = new WebAssembly.Memory({
initial: 32, // 2MiB, 1MiB of which is stack
maximum: 16384, // 1GiB
shared: true,
});
// "Master thread" to do maintance on (static allocations, merging results etc)
const master_wasm = await WebAssembly.instantiateStreaming(fetch('wasm/lod.wasm'), {
env: { 'memory': memory }
});
const nworkers = navigator.hardwareConcurrency;
state.wasm.exports = master_wasm.instance.exports;
state.wasm.heap_base = state.wasm.exports.alloc_static(0);
state.wasm.workers = [];
state.wasm.memory = memory;
for (let i = 0; i < nworkers; ++i) {
const w = new Worker('lod_worker.js');
state.wasm.workers.push(w);
}
await workers_thread_message(state.wasm.workers, {
'type': 'init',
'heap_base': state.wasm.heap_base,
'memory': memory,
}, 'thread_id');
state.wasm.stroke_bytes = 4096;
state.wasm.coords_bytes = 4096;
state.wasm.buffers = {
'xs': {
'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2),
'used': 0
},
'ys': {
'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2),
'used': 0
},
'coords_from': {
'offset': state.wasm.exports.alloc_static(state.wasm.stroke_bytes),
'used': 0,
},
'pressures': {
'offset': state.wasm.exports.alloc_static(state.wasm.coords_bytes / 8),
'used': 0
},
};
const mem = state.wasm.memory.buffer;
state.wasm.buffers['xs'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8,
mem, state.wasm.buffers['xs'].offset);
state.wasm.buffers['ys'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8,
mem, state.wasm.buffers['ys'].offset);
state.wasm.buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4,
mem, state.wasm.buffers['coords_from'].offset);
state.wasm.buffers['pressures'].tv = tv_create_on(Uint8Array, state.wasm.coords_bytes / 8,
mem, state.wasm.buffers['pressures'].offset);
tv_add(state.wasm.buffers['coords_from'].tv, 0);
state.wasm.buffers['coords_from'].used = 4;
}
function wasm_ensure_by(state, nstrokes, ncoords) {
const buffers = state.wasm.buffers;
const old_coords_from_offset = buffers['coords_from'].offset;
const old_pressures_offset = buffers['pressures'].offset;
const old_size_coords = state.wasm.coords_bytes;
const old_size_strokes = state.wasm.stroke_bytes;
let realloc = false;
if (buffers['xs'].used + ncoords * 4 > state.wasm.coords_bytes / 2) {
state.wasm.coords_bytes += round_to_pow2(ncoords * 4, 4096 * 16); // 1 wasm page (although it doesn't matter here)
realloc = true;
}
if (buffers['coords_from'].used + nstrokes * 4 > state.wasm.stroke_bytes / 2) {
state.wasm.stroke_bytes += round_to_pow2(nstrokes * 4, 4096 * 16);
realloc = true;
}
if (realloc) {
const current_pages = Math.ceil(state.wasm.memory.buffer.byteLength / (4096 * 16));
const need_pages = 2 * Math.ceil((state.wasm.coords_bytes * 3 + state.wasm.stroke_bytes * 2) / (4096 * 16)); // TODO: figure out actual memory requirements
const grow_by = Math.max(1, need_pages - current_pages);
// const grow_by = 16;
state.wasm.memory.grow(grow_by);
state.wasm.exports.free_static();
const mem = state.wasm.memory.buffer;
const memv = new Uint8Array(mem);
buffers['xs'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2);
buffers['ys'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 2);
buffers['coords_from'].offset = state.wasm.exports.alloc_static(state.wasm.stroke_bytes);
buffers['pressures'].offset = state.wasm.exports.alloc_static(state.wasm.coords_bytes / 8);
buffers['xs'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, buffers['xs'].offset);
buffers['ys'].tv = tv_create_on(Float32Array, state.wasm.coords_bytes / 8, mem, buffers['ys'].offset);
buffers['coords_from'].tv = tv_create_on(Uint32Array, state.wasm.stroke_bytes / 4, mem, buffers['coords_from'].offset);
buffers['pressures'].tv = tv_create_on(Uint8Array, state.wasm.coords_bytes / 8, mem, buffers['pressures'].offset);
// TODO: this should have been automatic maybe?
buffers['xs'].tv.size = buffers['xs'].used / 4;
buffers['ys'].tv.size = buffers['ys'].used / 4;
buffers['coords_from'].tv.size = buffers['coords_from'].used / 4;
buffers['pressures'].tv.size = buffers['pressures'].used;
const tmp = new Uint8Array(Math.max(state.wasm.coords_bytes, state.wasm.stroke_bytes)); // TODO: needed?
// Copy from back to front (otherwise we will overwrite)
tmp.set(new Uint8Array(mem, old_pressures_offset, buffers['pressures'].used));
memv.set(new Uint8Array(tmp.buffer, 0, buffers['pressures'].used), buffers['pressures'].offset);
tmp.set(new Uint8Array(mem, old_coords_from_offset, old_size_strokes));
memv.set(new Uint8Array(tmp.buffer, 0, old_size_strokes), buffers['coords_from'].offset);
}
}
async function do_lod(state, context) {
state.wasm.exports.free_dynamic();
const buffers = state.wasm.buffers;
const result_buffers = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4);
const result_counts = state.wasm.exports.alloc_dynamic(state.wasm.workers.length * 4);
const clipped_indices = state.wasm.exports.alloc_dynamic(context.clipped_indices.size * 4);
const mem = new Uint8Array(state.wasm.memory.buffer);
// Dynamic input data that should (by design) never be too big
mem.set(tv_bytes(context.clipped_indices), clipped_indices);
// TODO: this is a very naive and dumb way of distributing work. Better way
// would be to distrubute strokes based on total point count, so that
// each worker gets approximately the same amout of _points_
const indices_per_thread = Math.floor(context.clipped_indices.size / state.wasm.workers.length);
const offsets = {
'coords_from': buffers['coords_from'].offset,
'xs': buffers['xs'].offset,
'ys': buffers['ys'].offset,
'pressures': buffers['pressures'].offset,
'result_buffers': result_buffers,
'result_counts': result_counts,
};
const jobs = [];
for (let i = 0; i < state.wasm.workers.length; ++i) {
let count = indices_per_thread;
if (i === state.wasm.workers.length - 1) {
count += context.clipped_indices.size % state.wasm.workers.length;
}
jobs.push({
'type': 'lod',
'indices_base': clipped_indices + i * 4 * indices_per_thread,
'indices_count': count,
'zoom': state.canvas.zoom,
'offsets': offsets
});
}
await workers_messages(state.wasm.workers, jobs);
const result_offset = state.wasm.exports.merge_results(
result_counts,
result_buffers,
state.wasm.workers.length
);
const segment_count = new Int32Array(state.wasm.memory.buffer, result_counts, 1)[0]; // by convention
// Use results without copying from WASM memory
const wasm_points = new Float32Array(state.wasm.memory.buffer,
result_offset, segment_count * 2);
const wasm_ids = new Uint32Array(state.wasm.memory.buffer,
result_offset + segment_count * 2 * 4, segment_count);
const wasm_pressures = new Uint8Array(state.wasm.memory.buffer,
result_offset + segment_count * 2 * 4 + segment_count * 4, segment_count);
context.instance_data_points.data = wasm_points;
context.instance_data_points.size = segment_count * 2;
context.instance_data_ids.data = wasm_ids;
context.instance_data_ids.size = segment_count;
context.instance_data_pressures.data = wasm_pressures;
context.instance_data_pressures.size = segment_count;
return segment_count;
}