I've finished writing a subroutine to solve the problem P^i.U^(i-!)+Q^i.U^(i)+S^i.U^(i+!)=0 (where P,Q and S are NxN Dcomplex arrays and U is an N Dcomplex vector and i refers to the current mesh position). My routine makes a lot of usage of ZGESV in order to solve this problem. In one limit P,Q and S are diagonal in which case one call to my subroutine takes approximately 1 minute (for N=101) however in most cases P,Q and S will be tridiagonal and in this case it seems that my subroutine now takes around 33 minutes instead. I was wondering if anyone knew if this is to be expected, and ways to get less of a performance hit in the tridiagonal case? I've included a sample portion of the subroutine below if it helps.

- Code: Select all
`!Initialise beta`

DO i=1,nrows

beta(i,i,istrt)=CMPLX(1.0d0,0.0d0)

END DO

!Iterate from istrt to max

DO i=istrt+1,nx

!Load matrices

CALL LOAD_MATS(i)

!Get alphas

!Form temporary using BLAS matrix multiplication

!CALL ZGEMM('N','N',nrows,nrows,nrows,CMPLX(1.d0,0.d0),p_mat,nrows,alpha(:,:,i-1),nrows,CMPLX(0.d0,0.d0),temp,nrows)

temp2=alpha(:,:,i-1)

temp=MATMUL(p_mat,temp2)

!Update temporary by adding q_mat

temp=q_mat+temp

!Update second temporary

temp2=-s_mat

!Solve for alpha(:,:,i) -> Stored in temp2

CALL ZGESV(nrows,nrows,temp,nrows,ipiv1,temp2,nrows,info)

!Fill alpha segment

alpha(:,:,i)=temp2

!Now get Betas

!Loop over initial conditions

DO j=1,nrows

!Form temporary using BLAS matrix multiplication

!CALL ZGEMV('N',nrows,nrows,1.0,-p_mat,nrows,beta(:,j,i-1),1,0,vec_temp,1)

!CALL ZGEMM('N','N',nrows,1,nrows,1.0,-p_mat,nrows,beta(:,j,i-1),nrows,0,vec_temp,nrows)

vec_temp(:,1)=MATMUL(-p_mat,beta(:,j,i-1))

!Solve for beta(:,j,i) -> Stored in vec_temp

CALL ZGESV(nrows,1,temp,nrows,ipiv1,vec_temp(:,1),nrows,info)

!Fill beta segment

beta(:,j,i)=vec_temp(:,1)

END DO

END DO