@ -62,26 +62,26 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
float dist_ab = __builtin_sqrtf ( dx * dx + dy * dy ) ;
float dist_ab = __builtin_sqrtf ( dx * dx + dy * dy ) ;
float dir_nx = dy / dist_ab * 255.0f ;
float dir_nx = dy / dist_ab * 255.0f ;
float dir_ny = - dx / dist_ab * 255.0f ;
float dir_ny = - dx / dist_ab * 255.0f ;
#if 0
#if 0
for ( int i = segment_start + 1 ; i < segment_end ; + + i ) {
for ( int i = segment_start + 1 ; i < segment_end ; + + i ) {
float px = xs [ coords_from + i ] ;
float px = xs [ coords_from + i ] ;
float py = ys [ coords_from + i ] ;
float py = ys [ coords_from + i ] ;
unsigned char pp = pressures [ coords_from + i ] ;
float apx = px - ax ;
unsigned char pp = pressures [ coords_from + i ] ;
float apy = py - ay ;
float apx = px - ax ;
float dist = __builtin_fabsf ( apx * dir_nx + apy * dir_ny )
float apy = py - ay ;
+ __builtin_abs ( pp - ap ) + __builtin_abs ( pp - bp ) ;
float dist = __builtin_fabsf ( apx * dir_nx + apy * dir_ny )
if ( dist > EPS & & dist > max_dist ) {
+ __builtin_abs ( pp - ap ) + __builtin_abs ( pp - bp ) ;
result = i ;
max_dist = dist ;
if ( dist > EPS & & dist > max_dist ) {
result = i ;
max_dist = dist ;
}
}
}
}
# else
# else
v128_t eps_x4 = wasm_f32x4_splat ( EPS ) ;
v128_t ax_x4 = wasm_f32x4_splat ( ax ) ;
v128_t ax_x4 = wasm_f32x4_splat ( ax ) ;
v128_t ay_x4 = wasm_f32x4_splat ( ay ) ;
v128_t ay_x4 = wasm_f32x4_splat ( ay ) ;
v128_t ap_x4 = wasm_f32x4_splat ( ap ) ;
v128_t ap_x4 = wasm_f32x4_splat ( ap ) ;
@ -91,18 +91,19 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
v128_t index_x4 = wasm_u32x4_make ( segment_start + 1 , segment_start + 2 , segment_start + 3 , segment_start + 4 ) ;
v128_t index_x4 = wasm_u32x4_make ( segment_start + 1 , segment_start + 2 , segment_start + 3 , segment_start + 4 ) ;
v128_t four_x4 = wasm_u32x4_const_splat ( 4 ) ;
v128_t four_x4 = wasm_u32x4_const_splat ( 4 ) ;
v128_t max_vals_x4 = wasm_f32x4_const_splat ( 0.0f ) ;
v128_t max_dist_x4 = wasm_f32x4_splat ( EPS ) ;
v128_t max_index_x4 = wasm_u32x4_const_splat ( - 1 ) ;
v128_t max_index_x4 = wasm_u32x4_const_splat ( - 1 ) ;
for ( int i = segment_start + 1 ; i < segment_end - 3 ; i + = 4 ) {
for ( int i = segment_start + 1 ; i < segment_end - 3 ; i + = 4 ) {
v128_t px_x4 = wasm_v128_load ( xs + coords_from + i ) ;
v128_t px_x4 = wasm_v128_load ( xs + coords_from + i ) ;
v128_t py_x4 = wasm_v128_load ( ys + coords_from + i ) ;
v128_t py_x4 = wasm_v128_load ( ys + coords_from + i ) ;
v128_t pp_x16 = wasm_v128_load ( pressures + coords_from / 2 + i ) ;
// Take 4 highest bytes and convert to float
v128_t pp_x4 = wasm_f32x4_make (
v128_t pp_x8 = wasm_u16x8_extend_high_u8x16 ( pp_x16 ) ;
pressures [ coords_from / 2 + i + 0 ] ,
v128_t pp_x4i = wasm_u32x4_extend_high_u16x8 ( pp_x8 ) ;
pressures [ coords_from / 2 + i + 1 ] ,
v128_t pp_x4 = wasm_f32x4_convert_i32x4 ( pp_x4i ) ; // i version is 8 times faster on x64?
pressures [ coords_from / 2 + i + 2 ] ,
pressures [ coords_from / 2 + i + 3 ]
) ;
v128_t apx_x4 = wasm_f32x4_sub ( px_x4 , ax_x4 ) ;
v128_t apx_x4 = wasm_f32x4_sub ( px_x4 , ax_x4 ) ;
v128_t apy_x4 = wasm_f32x4_sub ( py_x4 , ay_x4 ) ;
v128_t apy_x4 = wasm_f32x4_sub ( py_x4 , ay_x4 ) ;
@ -120,19 +121,10 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
)
)
) ;
) ;
v128_t dist_mask = wasm_f32x4_gt ( dist_x4 , eps_x4 ) ;
v128_t mask = wasm_f32x4_gt ( dist_x4 , max_dist_x4 ) ;
v128_t max_mask = wasm_f32x4_gt ( dist_x4 , max_vals_x4 ) ;
v128_t mask = wasm_v128_and ( dist_mask , max_mask ) ;
max_index_x4 = wasm_v128_or (
wasm_v128_and ( index_x4 , mask ) ,
wasm_v128_andnot ( max_index_x4 , mask )
) ;
max_vals_x4 = wasm_v128_or (
max_index_x4 = wasm_v128_bitselect ( index_x4 , max_index_x4 , mask ) ;
wasm_v128_and ( dist_x4 , mask ) ,
max_dist_x4 = wasm_v128_bitselect ( dist_x4 , max_dist_x4 , mask ) ;
wasm_v128_andnot ( max_vals_x4 , mask )
) ;
index_x4 = wasm_i32x4_add ( index_x4 , four_x4 ) ;
index_x4 = wasm_i32x4_add ( index_x4 , four_x4 ) ;
}
}
@ -141,7 +133,7 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
float values [ 4 ] ;
float values [ 4 ] ;
wasm_v128_store ( indices , max_index_x4 ) ;
wasm_v128_store ( indices , max_index_x4 ) ;
wasm_v128_store ( values , max_vals _x4 ) ;
wasm_v128_store ( values , max_dist _x4 ) ;
for ( int i = 0 ; i < 4 ; + + i ) {
for ( int i = 0 ; i < 4 ; + + i ) {
if ( indices [ i ] ! = - 1 ) {
if ( indices [ i ] ! = - 1 ) {
@ -152,6 +144,11 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
}
}
}
}
if ( max_dist = = EPS ) {
max_dist = 0.0f ;
result = - 1 ;
}
int remainder = ( segment_end - segment_start - 1 ) % 4 ;
int remainder = ( segment_end - segment_start - 1 ) % 4 ;
for ( int i = segment_end - remainder ; i < segment_end ; + + i ) {
for ( int i = segment_end - remainder ; i < segment_end ; + + i ) {
@ -171,9 +168,8 @@ rdp_find_max(float *xs, float *ys, unsigned char *pressures, float zoom, int coo
max_dist = dist ;
max_dist = dist ;
}
}
}
}
# endif
# endif
return ( result ) ;
return ( result ) ;
}
}