I am testing zgemm_() subroutine and write the following code to compute C=A*B, of which all are 2*2 matrices.

--------------------------------------------------------------------------

#include<stdio.h>

#include<f2c.h>

#include<clapack.h>

#include<blaswrap.h>

int main(){

char transa = 'N';

char transb = 'N';

integer m = 2;

integer n = 2;

integer k = 2;

doublecomplex alpha;

doublecomplex a[2][2];

integer lda = 2;

doublecomplex b[2][2];

integer ldb = 2;

doublecomplex beta;

doublecomplex c[2][2];

integer ldc = 2;

int i, j;

a[0][0].r = 1;

a[0][1].r = 1;

a[1][0].r = 1;

a[1][1].r = 1;

b[0][0].r = 1;

b[0][1].r = 0;

b[1][0].r = 1;

b[1][1].r = 2;

alpha.r = 1; alpha.i = 0;

beta.r = 0; beta.i = 0;

// Print A, B, C before multiplication

for(i=0;i<2;i++){

for(j=0;j<2;j++) printf("%+f i %+f ", a[i][j].r, a[i][j].i);

putchar('\n');

}

for(i=0;i<2;i++){

for(j=0;j<2;j++) printf("%+f i %+f ", b[i][j].r, b[i][j].i);

putchar('\n');

}

for(i=0;i<2;i++){

for(j=0;j<2;j++) printf("%+f i %+f ", c[i][j].r, c[i][j].i);

putchar('\n');

}

zgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);

// Print A, B, C after multiplication

for(i=0;i<2;i++){

for(j=0;j<2;j++) printf("%+f i %+f ", a[i][j].r, a[i][j].i);

putchar('\n');

}

for(i=0;i<2;i++){

for(j=0;j<2;j++) printf("%+f i %+f ", b[i][j].r, b[i][j].i);

putchar('\n');

}

for(i=0;i<2;i++){

for(j=0;j<2;j++) printf("%+f i %+f ", c[i][j].r, c[i][j].i);

putchar('\n');

}

return 0;

}

--------------------------------------------------------------------------

The screen output is:

--------------------------------------------------------------------------

+1.000000 i +0.000000 +1.000000 i +0.000000 // A before

+1.000000 i +0.000000 +1.000000 i +0.000000

+1.000000 i +0.000000 +0.000000 i +0.000000 // B before

+1.000000 i +0.000000 +2.000000 i +0.000000

+0.000000 i +0.000000 +0.000000 i +0.000000 // C before

+0.000000 i +0.000000 +0.000000 i +0.000000

+1.000000 i +0.000000 +1.000000 i +0.000000 // A after

+1.000000 i +0.000000 +1.000000 i +0.000000

+1.000000 i +0.000000 +0.000000 i +0.000000 // B after

+1.000000 i +0.000000 +2.000000 i +0.000000

+1.000000 i +0.000000 +1.000000 i +0.000000 // C after

+3.000000 i +0.000000 +3.000000 i +0.000000

--------------------------------------------------------------------------

As can be seen, the returned C is clearly wrong. Which point am I missing?