Listing 2. Simplified Matrix Multiplication in Brook kernel void matmulKernel( float N2, float A[][], float B[][], out float result<> ){ float2 ik = indexof(result).xy; float4 ijjk = float4( ik.x, 0.0f, 0.0f, ik.y ); float4 jp1 = float4( 0.0f, 1.0f, 1.0f, 0.0f ); float C = 0.0f; float n2 = N2; while( n2 > 0 ) { C += A[ijjk.zw]*B[ijjk.xy]; ijjk += jp1; n2 -= 1.0f; } result = C; } void matmul( float* A, float* B, float* C, size_t N1, size_t N2, size_t N3 ){ float Astream; float Bstream; float Cstream; streamRead( Astream, A ); streamRead( Bstream, B ); matmulKernel( (float)N2, Astream, Bstream, Cstream ); streamWrite( Cstream, C ); }