unsafe extern "C" fn bitshuffle_128( data: Simd<i8, 16>, indices: Simd<i8, 16>, mask: __mmask16, ) -> __mmask16