unsafe extern "C" fn expandloadps_128( mem_addr: *const f32, a: Simd<f32, 4>, mask: u8, ) -> Simd<f32, 4>