Compiling Magma 1.0 for Mac OSX

Open discussion for MAGMA

Compiling Magma 1.0 for Mac OSX

Postby braunm » Thu Dec 23, 2010 7:55 pm

Hi. I'm trying to install Magma 1.0 RC2 on my Mac Pro running OSX 10.6.5, with a Quadro 4000 GPU (Fermi), and linking to Intel MKL 10.2. I am getting some "Undefined symbols" in the compilation process, and I don't know how to fix it.

First, here is my make.inc file

Code: Select all
GPU_TARGET = 1
CC        = gcc
NVCC      = nvcc
FORT      = gfortran
ARCH      = ar
ARCHFLAGS = cr
RANLIB    = ranlib
OPTS      = -O3 -m64 -DADD_
NVOPTS    = --compiler-options -DUNIX -O3 -m64 -DADD_
LDOPTS    = -fPIC -m64
LIB       = -lmkl_intel_lp64 -lmkl_intel_thread  -lpthread -lcublas -lm
CUDADIR   = /usr/local/cuda
LIBDIR    = -L$(MKL_LIBS) \
            -L$(CUDADIR)/lib
INC       = -I$(CUDADIR)/include
LIBMAGMA     = ../lib/libmagma.a
LIBMAGMABLAS = ../lib/libmagmablas.a



and here is all of the output from the compilation process:
Code: Select all
braunm4:$ make
( cd src         && make )
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zpotrf.cpp -o zpotrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zpotrf_gpu.cpp -o zpotrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zpotrs_gpu.cpp -o zpotrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zposv_gpu.cpp -o zposv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgetrf.cpp -o zgetrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgetrf_gpu.cpp -o zgetrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgetrs_gpu.cpp -o zgetrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgesv_gpu.cpp -o zgesv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgeqrf.cpp -o zgeqrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgeqlf.cpp -o zgeqlf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgelqf.cpp -o zgelqf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgeqrf_gpu.cpp -o zgeqrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgeqrf2_gpu.cpp -o zgeqrf2_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgeqrs_gpu.cpp -o zgeqrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zgels_gpu.cpp -o zgels_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zlarfb_gpu.cpp -o zlarfb_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zunmqr_gpu.cpp -o zunmqr_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zcposv_gpu.cpp -o zcposv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zcgesv_gpu.cpp -o zcgesv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zcgetrs_gpu.cpp -o zcgetrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zcgeqrsv_gpu.cpp -o zcgeqrsv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cpotrf.cpp -o cpotrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cpotrf_gpu.cpp -o cpotrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cpotrs_gpu.cpp -o cpotrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cposv_gpu.cpp -o cposv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgetrf.cpp -o cgetrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgetrf_gpu.cpp -o cgetrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgetrs_gpu.cpp -o cgetrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgesv_gpu.cpp -o cgesv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgeqrf.cpp -o cgeqrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgeqlf.cpp -o cgeqlf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgelqf.cpp -o cgelqf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgeqrf_gpu.cpp -o cgeqrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgeqrf2_gpu.cpp -o cgeqrf2_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgeqrs_gpu.cpp -o cgeqrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cgels_gpu.cpp -o cgels_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c clarfb_gpu.cpp -o clarfb_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cunmqr_gpu.cpp -o cunmqr_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dpotrf.cpp -o dpotrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dpotrf_gpu.cpp -o dpotrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dpotrs_gpu.cpp -o dpotrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dposv_gpu.cpp -o dposv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgetrf.cpp -o dgetrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgetrf_gpu.cpp -o dgetrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgetrs_gpu.cpp -o dgetrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgesv_gpu.cpp -o dgesv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgeqrf.cpp -o dgeqrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgeqlf.cpp -o dgeqlf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgelqf.cpp -o dgelqf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgeqrf_gpu.cpp -o dgeqrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgeqrf2_gpu.cpp -o dgeqrf2_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgeqrs_gpu.cpp -o dgeqrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgels_gpu.cpp -o dgels_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dlarfb_gpu.cpp -o dlarfb_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dormqr_gpu.cpp -o dormqr_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dsposv_gpu.cpp -o dsposv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dsgesv_gpu.cpp -o dsgesv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dsgetrs_gpu.cpp -o dsgetrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dsgeqrsv_gpu.cpp -o dsgeqrsv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c spotrf.cpp -o spotrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c spotrf_gpu.cpp -o spotrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c spotrs_gpu.cpp -o spotrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sposv_gpu.cpp -o sposv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgetrf.cpp -o sgetrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgetrf_gpu.cpp -o sgetrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgetrs_gpu.cpp -o sgetrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgesv_gpu.cpp -o sgesv_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgeqrf.cpp -o sgeqrf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgeqlf.cpp -o sgeqlf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgelqf.cpp -o sgelqf.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgeqrf_gpu.cpp -o sgeqrf_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgeqrf2_gpu.cpp -o sgeqrf2_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgeqrs_gpu.cpp -o sgeqrs_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgels_gpu.cpp -o sgels_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c slarfb_gpu.cpp -o slarfb_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sormqr_gpu.cpp -o sormqr_gpu.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c auxiliary.cpp -o auxiliary.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c xerbla.cpp -o xerbla.o
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c get_nb.cpp -o get_nb.o
ar cr ../lib/libmagma.a zpotrf.o zpotrf_gpu.o zpotrs_gpu.o zposv_gpu.o zgetrf.o zgetrf_gpu.o zgetrs_gpu.o zgesv_gpu.o zgeqrf.o zgeqlf.o zgelqf.o zgeqrf_gpu.o zgeqrf2_gpu.o zgeqrs_gpu.o zgels_gpu.o zlarfb_gpu.o zunmqr_gpu.o zcposv_gpu.o zcgesv_gpu.o zcgetrs_gpu.o zcgeqrsv_gpu.o cpotrf.o cpotrf_gpu.o cpotrs_gpu.o cposv_gpu.o cgetrf.o cgetrf_gpu.o cgetrs_gpu.o cgesv_gpu.o cgeqrf.o cgeqlf.o cgelqf.o cgeqrf_gpu.o cgeqrf2_gpu.o cgeqrs_gpu.o cgels_gpu.o clarfb_gpu.o cunmqr_gpu.o dpotrf.o dpotrf_gpu.o dpotrs_gpu.o dposv_gpu.o dgetrf.o dgetrf_gpu.o dgetrs_gpu.o dgesv_gpu.o dgeqrf.o dgeqlf.o dgelqf.o dgeqrf_gpu.o dgeqrf2_gpu.o dgeqrs_gpu.o dgels_gpu.o dlarfb_gpu.o dormqr_gpu.o dsposv_gpu.o dsgesv_gpu.o dsgetrs_gpu.o dsgeqrsv_gpu.o spotrf.o spotrf_gpu.o spotrs_gpu.o sposv_gpu.o sgetrf.o sgetrf_gpu.o sgetrs_gpu.o sgesv_gpu.o sgeqrf.o sgeqlf.o sgelqf.o sgeqrf_gpu.o sgeqrf2_gpu.o sgeqrs_gpu.o sgels_gpu.o slarfb_gpu.o sormqr_gpu.o auxiliary.o xerbla.o get_nb.o
ranlib ../lib/libmagma.a
( cd magmablas   && make )
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zauxiliary.cu -o zauxiliary.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zpermute.cu -o zpermute.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zpermute-v2.cu -o zpermute-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zswap.cu -o zswap.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zswapblk.cu -o zswapblk.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zlacpy.cu -o zlacpy.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zlange.cu -o zlange.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zlanhe.cu -o zlanhe.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c ztranspose.cu -o ztranspose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c ztranspose-v2.cu -o ztranspose-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zinplace_transpose.cu -o zinplace_transpose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zclaswp.cu -o zclaswp.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zcaxpycp.cu -o zcaxpycp.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zlag2c.cu -o zlag2c.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c clag2z.cu -o clag2z.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zlat2c.cu -o zlat2c.cu_o
./zlat2c.cu(46): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(80): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(140): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(230): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(278): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(352): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(447): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(492): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(551): Advisory: Loop was not unrolled, unexpected control flow construct
./zlat2c.cu(591): Advisory: Loop was not unrolled, unexpected control flow construct
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cauxiliary.cu -o cauxiliary.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cpermute.cu -o cpermute.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cpermute-v2.cu -o cpermute-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cswap.cu -o cswap.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cswapblk.cu -o cswapblk.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c clacpy.cu -o clacpy.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c clange.cu -o clange.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c clanhe.cu -o clanhe.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c ctranspose.cu -o ctranspose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c ctranspose-v2.cu -o ctranspose-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c cinplace_transpose.cu -o cinplace_transpose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dauxiliary.cu -o dauxiliary.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dpermute.cu -o dpermute.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dpermute-v2.cu -o dpermute-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dswap.cu -o dswap.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dswapblk.cu -o dswapblk.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dlacpy.cu -o dlacpy.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dlange.cu -o dlange.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dlansy.cu -o dlansy.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dtranspose.cu -o dtranspose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dtranspose-v2.cu -o dtranspose-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dinplace_transpose.cu -o dinplace_transpose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dslaswp.cu -o dslaswp.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dsaxpycp.cu -o dsaxpycp.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dlag2s.cu -o dlag2s.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c slag2d.cu -o slag2d.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dlat2s.cu -o dlat2s.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sauxiliary.cu -o sauxiliary.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c spermute.cu -o spermute.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c spermute-v2.cu -o spermute-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sswap.cu -o sswap.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sswapblk.cu -o sswapblk.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c slacpy.cu -o slacpy.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c slange.cu -o slange.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c slansy.cu -o slansy.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c stranspose.cu -o stranspose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c stranspose-v2.cu -o stranspose-v2.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sinplace_transpose.cu -o sinplace_transpose.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgemv_MLU.cu -o dgemv_MLU.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgemm_fermi.cu -o sgemm_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgemm_fermi80.cu -o sgemm_fermi80.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c sgemm_fermi64.cu -o sgemm_fermi64.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dgemm_fermi.cu -o dgemm_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c ssymv_fermi.cu -o ssymv_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dsymv_fermi.cu -o dsymv_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zsymv_fermi.cu -o zsymv_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c csymv_fermi.cu -o csymv_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c chemv_fermi.cu -o chemv_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c zhemv_fermi.cu -o zhemv_fermi.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c strsm_tesla.cu -o strsm_tesla.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c dtrsm_tesla.cu -o dtrsm_tesla.cu_o
nvcc --compiler-options -DUNIX -O3 -m64 -DADD_ -arch sm_20 -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c ssyr2k.cu -o ssyr2k.cu_o
ar cr ../lib/libmagmablas.a zauxiliary.cu_o zpermute.cu_o zpermute-v2.cu_o zswap.cu_o zswapblk.cu_o zlacpy.cu_o zlange.cu_o zlanhe.cu_o ztranspose.cu_o ztranspose-v2.cu_o zinplace_transpose.cu_o zclaswp.cu_o zcaxpycp.cu_o zlag2c.cu_o clag2z.cu_o zlat2c.cu_o cauxiliary.cu_o cpermute.cu_o cpermute-v2.cu_o cswap.cu_o cswapblk.cu_o clacpy.cu_o clange.cu_o clanhe.cu_o ctranspose.cu_o ctranspose-v2.cu_o cinplace_transpose.cu_o dauxiliary.cu_o dpermute.cu_o dpermute-v2.cu_o dswap.cu_o dswapblk.cu_o dlacpy.cu_o dlange.cu_o dlansy.cu_o dtranspose.cu_o dtranspose-v2.cu_o dinplace_transpose.cu_o dslaswp.cu_o dsaxpycp.cu_o dlag2s.cu_o slag2d.cu_o dlat2s.cu_o sauxiliary.cu_o spermute.cu_o spermute-v2.cu_o sswap.cu_o sswapblk.cu_o slacpy.cu_o slange.cu_o slansy.cu_o stranspose.cu_o stranspose-v2.cu_o sinplace_transpose.cu_o dgemv_MLU.cu_o sgemm_fermi.cu_o sgemm_fermi80.cu_o sgemm_fermi64.cu_o dgemm_fermi.cu_o ssymv_fermi.cu_o dsymv_fermi.cu_o zsymv_fermi.cu_o csymv_fermi.cu_o chemv_fermi.cu_o zhemv_fermi.cu_o strsm_tesla.cu_o dtrsm_tesla.cu_o ssyr2k.cu_o
ranlib ../lib/libmagmablas.a
( cd testing/lin && make )
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zbdt01.f -o zbdt01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zhet21.f -o zhet21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zhst01.f -o zhst01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zunt01.f -o zunt01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zqrt02.f -o zqrt02.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zlarfy.f -o zlarfy.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c zstt21.f -o zstt21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c cbdt01.f -o cbdt01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c chet21.f -o chet21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c chst01.f -o chst01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c cunt01.f -o cunt01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c cqrt02.f -o cqrt02.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c clarfy.f -o clarfy.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c cstt21.f -o cstt21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dbdt01.f -o dbdt01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dsyt21.f -o dsyt21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dhst01.f -o dhst01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dort01.f -o dort01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dqrt02.f -o dqrt02.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dlarfy.f -o dlarfy.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c dstt21.f -o dstt21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c sbdt01.f -o sbdt01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c ssyt21.f -o ssyt21.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c shst01.f -o shst01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c sort01.f -o sort01.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c sqrt02.f -o sqrt02.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c slarfy.f -o slarfy.o
gfortran -O3 -m64 -DADD_ -I/usr/local/cuda/include -c sstt21.f -o sstt21.o
ar cr liblapacktest.a zbdt01.o zhet21.o zhst01.o zunt01.o zqrt02.o zlarfy.o zstt21.o cbdt01.o chet21.o chst01.o cunt01.o cqrt02.o clarfy.o cstt21.o dbdt01.o dsyt21.o dhst01.o dort01.o dqrt02.o dlarfy.o dstt21.o sbdt01.o ssyt21.o shst01.o sort01.o sqrt02.o slarfy.o sstt21.o
ranlib liblapacktest.a
( cd testing     && make )
gcc -O3 -m64 -DADD_ -DGPUSHMEM=200 -I/usr/local/cuda/include -I../include  -c testing_zgemm.cpp -o testing_zgemm.o
gfortran -O3 -m64 -DADD_ -DGPUSHMEM=200 -fPIC -m64  -DGPUSHMEM=200  testing_zgemm.o  -o testing_zgemm lin/liblapacktest.a -L../lib \
          -lcuda -lmagma -lmagmablas -L/opt/intel/Compiler/11.1/089/Frameworks/mkl/Libraries/em64t -L/usr/local/cuda/lib  -lmkl_intel_lp64 -lmkl_intel_thread  -lpthread -lcublas 
Undefined symbols:
  "___gxx_personality_v0", referenced from:
      verifyResult(double2 const*, double2 const*)in testing_zgemm.o
      _main in testing_zgemm.o
      CIE in testing_zgemm.o
      _GetTimerValue in libmagma.a(auxiliary.o)
      _spanel_to_q in libmagma.a(auxiliary.o)
      _sq_to_panel in libmagma.a(auxiliary.o)
      _cpanel_to_q in libmagma.a(auxiliary.o)
      _cq_to_panel in libmagma.a(auxiliary.o)
      _dpanel_to_q in libmagma.a(auxiliary.o)
      _dq_to_panel in libmagma.a(auxiliary.o)
      _zpanel_to_q in libmagma.a(auxiliary.o)
      _zq_to_panel in libmagma.a(auxiliary.o)
      _swp2pswp in libmagma.a(auxiliary.o)
      _getv in libmagma.a(auxiliary.o)
      _printout_devices in libmagma.a(auxiliary.o)
      _get_current_time in libmagma.a(auxiliary.o)
      CIE in libmagma.a(auxiliary.o)
  "_cudaThreadSynchronize", referenced from:
      _get_current_time in libmagma.a(auxiliary.o)
ld: symbol(s) not found
collect2: ld returned 1 exit status
make[1]: *** [testing_zgemm] Error 1
rm testing_zgemm.o
make: *** [test] Error 2



Any ideas? Thanks in advance for any advice you can provide.

Michael Braun
MIT Sloan School of Management
braunm -at- mit.edu
braunm
 
Posts: 5
Joined: Thu Dec 23, 2010 6:55 pm

Re: Compiling Magma 1.0 for Mac OSX

Postby Boxed Cylon » Fri Dec 24, 2010 7:39 pm

It looks like you are missing a few libraries. Instead of:

Code: Select all
LIB       = -lmkl_intel_lp64 -lmkl_intel_thread  -lpthread -lcublas


try

Code: Select all
LIB       = -lmkl_intel_lp64 -lmkl_intel_thread  -lmkl_core -lpthread -lcuda -lcublas ]


That is, you seem to be missing the mkl_core and cuda libraries.
Boxed Cylon
 
Posts: 27
Joined: Sat Nov 21, 2009 6:03 pm

Re: Compiling Magma 1.0 for Mac OSX

Postby braunm » Fri Dec 24, 2010 11:02 pm

Thanks. Actually, adding -lmkl_core and -lcuda wasn't enough. I had to link to -lstdc++.6 and -lcudart also. Then the whole thing compiled.
braunm
 
Posts: 5
Joined: Thu Dec 23, 2010 6:55 pm


Return to User discussion

Who is online

Users browsing this forum: Bing [Bot], Google [Bot] and 2 guests