Hi,

I have managed to solve some of the issues, but for function dgelsd_ in CLAPACK 22.0 I still cannot explain the differences with LAPACK_dgelsd (or LAPACKE_dgelsd_work with column major). The test case has been ove simplified and reduced to:

- Code: Select all
`#ifdef LP`

#define lp_int lapack_int

extern "C" lapack_int LAPACKE_dgelsd_work( int matrix_order, lapack_int m, lapack_int n,

lapack_int nrhs, double* a, lapack_int lda,

double* b, lapack_int ldb, double* s, double rcond,

lapack_int* rank, double* work, lapack_int lwork, lapack_int* iwork);

#else

extern "C" int dgelsd_(integer* m, integer* n, integer* nrhs, double* a,

integer* lda, double* b, integer* ldb, double* s,

double* rcond, integer* rank, double* work,

integer* lwork, integer* iwork, integer *info );

#define lp_int integer

#endif

void T1_dgelsd(lp_int m, lp_int n)

{

lp_int nhrs = 1;

lp_int lda = m;

lp_int ldb = m;

lp_int rank = 1;

lp_int lwork = 1024 * m;

lp_int* iwork = new lp_int[lwork];

lp_int info = 0;

double* a = new double[m*n];

double* b = new double[m];

double* s = new double[min(m, n)];

double rcond = -1.0;

double* work = new double[lwork];

//Initialize input parameters

int desp = 0;

for(int j = 1; j <= n; j++) {

desp = (j - 1)*m;

for(int i = 1; i <= m; i++) a[desp + i - 1] = i * j;

}

for(int i = 1; i <= m; i++) b[i-1] = i;

for(int i = 1; i <= m; i++) s[i-1] = 0;

for(int i = 1; i <= lwork; i++) iwork[i-1] = 0;

std::cout << "\n===========================" << std::endl;

std::cout << " T1_dgelsd - TEST1" << std::endl;

std::cout << "===========================" << std::endl;

std::cout << "INPUT DATA => " << std::endl;

report_dgelsd(&info, &m, &n, &nhrs, a, &lda, b, &ldb, s, &rcond, &rank, work, &lwork, iwork);

#ifdef LP

LAPACKE_dgelsd_work(LAPACK_COL_MAJOR, m, n, nhrs, a, lda, b, ldb, s, rcond, &rank, work, lwork, iwork);

#else

dgelsd_( &m, &n, &nhrs, a, &lda, b, &ldb, s, &rcond, &rank, work, &lwork, iwork, &info);

#endif

std::cout << "OUTPUT => " << std::endl;

report_dgelsd(&info, &m, &n, &nhrs, a, &lda, b, &ldb, s, &rcond, &rank, work, &lwork, iwork);

}

Test is called with inputs parametes:

- Code: Select all
`T1_dgelsd(4, 3); // A = [1 2 3 4 2 4 6 8 3 6 9 12] and B = [1 2 3 4]`

The input matrix clearly has rank 1, and I get this for dgelsd_ but for LAPACKE_dgelsd_work ir return a rank value of 2!! The output is for both functions:

CLAPACK 22.0.0 ----------- LAPACK-3.4.1

m = 4 ----------------------- m = 4

n = 3 ------------------------ n = 3

nhrs = 1 -------------------- nhrs = 1

A[0] = 5.47723 ------------ A[0] = -5.47723

A[1] = 0 -------------------- A[1] = 0

A[2] = 0 -------------------- A[2] = 0

A[3] = -0.89341 ----------- A[3] = 0.617548

A[4] = -19.7484 ----------- A[4] = 19.7484

A[5] = 2.79784e-15 ------- A[5] = 1.12886e-15

A[6] = 0.122366 ----------- A[6] = 0.331679

A[7] = 0.579796 ----------- A[7] = 0.579796

A[8] = 0.535184 ----------- A[8] = 0.535184

A[9] = 6.7235e-16 -------- A[9] = -2.84902e-15

A[10] = 6.30467e-16 ----- A[10] = -1.5626e-15

A[11] = 7.58139 ----------- A[11] = -0.131902

lda = 4 ---------------------- lda = 4

B[0] = 0.0714286 ---------- B[0] = 0.103441

B[1] = 0.142857 ------------ B[1] = 0.476801

B[2] = 0.214286 ------------ B[2] = -0.0190139

B[3] = -2.16038e-31 ------- B[3] = -2.16038e-31

ldb = 4 ----------------------- ldb = 4

S[0] = 20.4939 ------------- S[0] = 20.4939

S[1] = 1.10799e-15 -------- S[1] = 3.26017e-15

S[2] = 4.25486e-16 -------- S[2] = 1.44604e-16

rcond = -1 ------------------- rcond = -1

rank = 1 --------------------- rank = 2

The rest of the output file has been omitted.

My makefile is now as follows:

- Code: Select all
`CC=g++`

CFLAGS=-c -Wall -I./lapack-3.4.1 -I./lapack-3.4.1/lapacke/include -Wno-deprecated -DHAVE_LAPACK_CONFIG_H -DLAPACK_ILP64

LDFLAGS=-L./lapack-3.4.1 -llapacke -llapack -lrefblas -lgfortran

SOURCES=main.cpp

OBJECTS=$(SOURCES:.cpp=.o)

EXECUTABLE=test_lp

all: $(SOURCES) $(EXECUTABLE)

$(EXECUTABLE): $(OBJECTS)

$(CC) $(OBJECTS) -o $@ $(LDFLAGS)

.cpp.o:

$(CC) $(CFLAGS) $< -o $@

clean:

rm -f *.o test_lp

The test case has been executed in the same machine, bit not necessarly in the same core of the chip.

Thanks,

Sebastian.