unsafe extern "C" fn dpbf16ps_256( a: Simd<f32, 8>, b: Simd<i16, 16>, c: Simd<i16, 16>, ) -> Simd<f32, 8>