unsafe extern "C" fn bitshuffle_256( data: Simd<i8, 32>, indices: Simd<i8, 32>, mask: __mmask32, ) -> __mmask32