I read the documentation and noticed that future developments did not explicitly include the complex equivalents (single or double). Is this an unintended omission? If complex equivalents are intended, when will they be ready?
MMB
extern "C" void
magmablas_cherk(char uplo, char trans, int n, int k, float alpha,
float2 *A, int lda, float beta, float2 *C, int ldc){
int ka, ldamin;
if (trans == 'N' || trans == 'n')
ka = k, ldamin = n;
else
ka = n, ldamin = k;
float2 *a = (float2*)malloc(ka*ldamin * sizeof(float2));
float2 *c = (float2*)malloc(n*n * sizeof(float2));
cublasGetMatrix(ldamin, ka, sizeof(float2), A, lda, a, ldamin);
cublasGetMatrix(n, n, sizeof(float2), C, ldc, c, n);
cherk_(&uplo, &trans, &n, &k, &alpha, a, &ldamin, &beta, c, &n);
cublasSetMatrix(n, n, sizeof(float2), c, n, C, ldc);
free(a);
free(c);
}
./testing_cpotrf
device 0: GeForce GTX 280, 1296.0 MHz clock, 1023.8 MB memory
Usage:
testing_cpotrf -N 1024
N CPU GFlop/s GPU GFlop/s ||R||_F / ||A||_F
========================================================
1024 53.71 49.52 1.598006e-08
2048 46.90 97.17 1.709468e-08
3072 50.33 122.87 1.484603e-08
4032 57.12 112.68 2.677048e-08
5184 58.99 126.12 2.073988e-08
6048 59.44 134.18 2.214732e-08
7200 67.61 148.46 2.458159e-08
8064 65.66 155.81 2.687845e-08
8928 60.30 182.20 3.045712e-08
Users browsing this forum: No registered users and 2 guests