@ -202,7 +202,8 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
float * ys ,
float * ys ,
unsigned char * pressures ,
unsigned char * pressures ,
char * * result_buffer ,
char * * result_buffer ,
int * result_count )
int * result_count ,
int * result_batch_count )
{
{
if ( clipped_count = = 0 ) {
if ( clipped_count = = 0 ) {
result_count [ 0 ] = 0 ;
result_count [ 0 ] = 0 ;
@ -281,16 +282,18 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
// Write actual coordinates (points) and stroke ids
// Write actual coordinates (points) and stroke ids
// Do this in one allocation so that they're not interleaved between threads
// Do this in one allocation so that they're not interleaved between threads
char * output = alloc_dynamic ( segments_head * ( 3 * 4 + 1 ) + clipped_count * 4 ) ;
char * output = alloc_dynamic ( round_to_pow2 ( segments_head * ( 3 * 4 + 1 ) , 4 ) + clipped_count * 4 * 2 ) ; // max two ints per stroke for batch info (realistically, much less)
float * points = ( float * ) output ;
float * points = ( float * ) output ;
int * ids = ( int * ) ( output + segments_head * 4 * 2 ) ;
int * ids = ( int * ) ( output + segments_head * 4 * 2 ) ;
unsigned char * pressures_res = ( unsigned char * ) ( output + segments_head * 4 * 3 ) ;
unsigned char * pressures_res = ( unsigned char * ) ( output + segments_head * 4 * 3 ) ;
unsigned int * batches = ( unsigned int * ) ( output + segments_head * ( 4 * 3 + 1 ) ) ;
int * batches = ( int * ) ( output + round_to_pow2 ( segments_head * ( 4 * 3 + 1 ) , 4 ) ) ;
int phead = 0 ;
int phead = 0 ;
int ihead = 0 ;
int ihead = 0 ;
float sqrt_zoom = __builtin_sqrtf ( zoom ) ;
float sqrt_zoom = __builtin_sqrtf ( zoom ) ;
int last_lod = - 1 ;
int last_lod = - 100 ;
int batch_count = 0 ;
int batch_size = 0 ;
for ( int i = 0 ; i < clipped_count ; + + i ) {
for ( int i = 0 ; i < clipped_count ; + + i ) {
int stroke_index = clipped_indices [ i ] ;
int stroke_index = clipped_indices [ i ] ;
@ -316,48 +319,76 @@ do_lod(int *clipped_indices, int clipped_count, float zoom,
}
}
}
}
int segment_count = to - from ;
// Compute recommended LOD level, add to current batch or start new batch
// Compute recommended LOD level, add to current batch or start new batch
float sqrt_width = __builtin_sqrtf ( width [ stroke_index ] ) ; // TOOD: pass in stroke width
float sqrt_width = __builtin_sqrtf ( width [ stroke_index ] ) ;
int lod = __builtin_round ( sqrt_zoom * sqrt_width * 0.3333f ) ;
int lod = __builtin_ceil ( sqrt_zoom * sqrt_width * 0.3333f ) ; // TODO: round
#if 0
if ( lod > 7 ) lod = 7 ;
if ( __builtin_abs ( lod - last_lod ) > 2 ) {
if ( batch_size > 0 & & __builtin_abs ( lod - last_lod ) > 2 ) {
// Start new batch
// Start new batch
} else {
batches [ batch_count * 2 + 0 ] = batch_size ;
// Add to existing batch
batches [ batch_count * 2 + 1 ] = last_lod ;
+ + batch_count ;
batch_size = 0 ;
}
}
batch_size + = segment_count ;
last_lod = lod ;
last_lod = lod ;
# endif
}
}
if ( batch_size > 0 ) {
batches [ batch_count * 2 + 0 ] = batch_size ;
batches [ batch_count * 2 + 1 ] = last_lod ;
+ + batch_count ;
}
result_buffer [ 0 ] = output ;
result_buffer [ 0 ] = output ;
result_count [ 0 ] = segments_head ;
result_count [ 0 ] = segments_head ;
result_batch_count [ 0 ] = batch_count ;
}
}
// NOT thread-safe, only call from one thread
// NOT thread-safe, only call from one thread
char *
char *
merge_results ( int * segment_counts , char * * buffers , int nthreads )
merge_results ( int * segment_counts , int * batch_counts , char * * buffers , int nthreads )
{
{
int total_segments = 0 ;
int total_segments = 0 ;
int total_batches = 0 ;
for ( int i = 0 ; i < nthreads ; + + i ) {
for ( int i = 0 ; i < nthreads ; + + i ) {
total_segments + = segment_counts [ i ] ;
total_segments + = segment_counts [ i ] ;
total_batches + = batch_counts [ i ] ;
}
}
char * merged = alloc_dynamic ( total_segments * ( 3 * 4 + 1 ) ) ;
char * merged = alloc_dynamic ( round_to_pow2 ( total_segments * ( 3 * 4 + 1 ) , 4 ) + total_batches * 4 ) ;
float * points = ( float * ) merged ;
float * points = ( float * ) merged ;
int * ids = ( int * ) ( merged + total_segments * 4 * 2 ) ;
int * ids = ( int * ) ( merged + total_segments * 4 * 2 ) ;
unsigned char * pressures = ( unsigned char * ) ( merged + total_segments * 4 * 3 ) ;
unsigned char * pressures = ( unsigned char * ) ( merged + total_segments * 4 * 3 ) ;
int * batches = ( int * ) ( merged + round_to_pow2 ( total_segments * ( 3 * 4 + 1 ) , 4 ) ) ;
int batch_base = 0 ;
int last_batch_lod = - 99 ;
int bhead = 0 ;
int written_batches = 0 ;
for ( int i = 0 ; i < nthreads ; + + i ) {
for ( int i = 0 ; i < nthreads ; + + i ) {
int segments = segment_counts [ i ] ;
int segments = segment_counts [ i ] ;
int nbatches = batch_counts [ i ] ;
int * thread_batches = ( int * ) ( buffers [ i ] + round_to_pow2 ( segments * ( 4 * 3 + 1 ) , 4 ) ) ;
if ( segments > 0 ) {
if ( segments > 0 ) {
__builtin_memcpy ( points , buffers [ i ] , segments * 4 * 2 ) ;
__builtin_memcpy ( points , buffers [ i ] , segments * 4 * 2 ) ;
__builtin_memcpy ( ids , buffers [ i ] + segments * 4 * 2 , segments * 4 ) ;
__builtin_memcpy ( ids , buffers [ i ] + segments * 4 * 2 , segments * 4 ) ;
__builtin_memcpy ( pressures , buffers [ i ] + segments * 4 * 3 , segments ) ;
__builtin_memcpy ( pressures , buffers [ i ] + segments * 4 * 3 , segments ) ;
for ( int j = 0 ; j < nbatches * 2 ; j + = 2 ) {
batches [ bhead + + ] = written_batches ;
batches [ bhead + + ] = thread_batches [ j + 1 ] ;
written_batches + = thread_batches [ j + 0 ] ;
}
points + = segments * 2 ;
points + = segments * 2 ;
ids + = segments ;
ids + = segments ;
pressures + = segments ;
pressures + = segments ;
@ -365,6 +396,7 @@ merge_results(int *segment_counts, char **buffers, int nthreads)
}
}
segment_counts [ 0 ] = total_segments ;
segment_counts [ 0 ] = total_segments ;
batch_counts [ 0 ] = total_batches ;
return ( merged ) ;
return ( merged ) ;
}
}