by katayama » Sat Jan 22, 2011 4:17 am
Uh mm. I see no attachment.
Here is the code, output and Makefile. I use cuda 3.2.
Thanks to you!
katayama@lb01 magma]$ more out.nan
[ [ 0.391137 , 0.37845 , 0.676826 , 0.828647 ];
[ 0.572013 , 1.29715 , 1.72296 , 3.94056 ];
[ 0.382288 , 0.33823 , 1.39019 , 2.54674 ];
[ 0.719967 , 1.69088 , 1.85096 , 1.95143 ] ]
[ [ 0.729483 , 0 , 0 , 0 ];
[ 0.551107 , 1.98647 , 0 , 0 ];
[ 0.946236 , 0.61067 , 2.91446 , 0 ];
[ 0.493657 , 0.600847 , 0.152826 , 3.33746 ] ]
[ [ nan , nan , nan , nan ];
[ nan , nan , nan , nan ];
[ nan , nan , nan , nan ];
[ nan , nan , nan , nan ] ]
#include <magmablas.h>
#include <iostream>
#define cublasDtrsm magmablas_dtrsm
void printmat(int N, int M, const double *A, int LDA)
{
double mtmp;
std::cout << "[ ";
for (int i = 0; i < N; i++) {
if(i>0) std::cout << " ";
std::cout << "[ ";
for (int j = 0; j < M; j++) {
mtmp = A[i + j * LDA];
std::cout << mtmp << " ";
if (j < M - 1) {
std::cout << ", ";
}
}
if (i < N - 1) {
std::cout << "];" << std::endl;
}
else {
std::cout << "] ";
}
}
std::cout << "]" << std::endl;
}
int main(void) {
double B[16];
double C[16];
int g(4);
size_t NxN(16);
B[0] = 0.391137;
B[4] = 0.37845;
B[8] = 0.676826;
B[12] =0.828647;
B[1] = 0.572013;
B[5] = 1.29715;
B[9] = 1.72296;
B[13] = 3.94056;
B[2] = 0.382288;
B[6] = 0.33823;
B[10] = 1.39019;
B[14] = 2.54674;
B[3] = 0.719967;
B[7] = 1.69088;
B[11] = 1.85096;
B[15] =1.95143;
C[0] =0.729483;
C[4] = 0;
C[8] = 0;
C[12] = 0;
C[1] = 0.551107;
C[5] = 1.98647;
C[9] = 0;
C[13] = 0;
C[2] = 0.946236;
C[6] = 0.61067;
C[10] = 2.91446;
C[14] = 0;
C[3] = 0.493657;
C[7] = 0.600847;
C[11] = 0.152826;
C[15] = 3.33746;
printmat(g,g,B,g);
printmat(g,g,C,g);
cuInit(0);
cublasInit();
double *dev_1;
double *dev_2;
cublasAlloc(NxN,sizeof(double),(void**)&dev_1);
cublasAlloc(NxN,sizeof(double),(void**)&dev_2);
cublasSetVector(NxN,sizeof(double),B,1,dev_1,1);
cublasSetVector(NxN,sizeof(double),C,1,dev_2,1);
cublasDtrsm('R', 'L','T','N', g, g, 1.0, dev_2, g, dev_1, g);
cublasGetVector(NxN,sizeof(double),dev_1,1,B,1);
printmat(g,g,B,g);
}
[katayama@lb01 magma]$ more Makefile
CXX=g++
MAGMA_TOP=/home/katayama/work/magma/magma_1.0.0-rc2
CUDA_TOP=/usr/local/cuda
INC=-I$(MAGMA_TOP)/include -I$(CUDA_TOP)/include
LIB=-L$(MAGMA_TOP)/lib -L$(CUDA_TOP)/lib64
CXXFLAGS = $(INC) -O3
all:nan
nan.o : nan.cc
nan : nan.o
$(CXX) -o $@ $< $(LIB) -lcuda -lmagma -lmagmablas -lcublas -lm
clean :
rm -f nan