001:       SUBROUTINE CORE_STSQRT( M, N, IB, A1, LDA1, A2, LDA2, T, LDT,
002:      $                       TAU, WORK, INFO )
003: 
004: *********************************************************************
005: *     PLASMA core_blas routine (version 2.1.0)                      *
006: *     Author: Hatem Ltaief                                          *
007: *     Release Date: November, 15th 2009                             *
008: *     PLASMA is a software package provided by Univ. of Tennessee,  *
009: *     Univ. of California Berkeley and Univ. of Colorado Denver.    *
010: *********************************************************************
011: *
012: *     .. Scalar Arguments ..
013:       INTEGER            M, N, IB, LDA1, LDA2, LDT, INFO
014: *     ..
015: *     .. Array Arguments ..
016:       REAL            A1( LDA1, * ), A2( LDA2, * )
017:       REAL            T( LDT, * ), TAU( * ), WORK( * )
018: *     ..
019: *
020: *  Purpose
021: *  =======
022: *
023: *  CORE_STSQRT computes a QR factorization of a rectangular matrix
024: *  formed by coupling a real N-by-N upper triangular tile A1
025: *  on top of a real M-by-N tile A2:
026: *
027: *  | A1 | = Q * R
028: *  | A2 |
029: *
030: *  Arguments
031: *  =========
032: *
033: *  M       (input) INTEGER
034: *          The number of rows of the tile A2.  M >= 0.
035: *
036: *  N       (input) INTEGER
037: *          The number of columns of the tile A1 and A2.  N >= 0.
038: *
039: *  IB      (input) INTEGER
040: *          The inner-blocking size.  IB >= 0.
041: *
042: *  A1      (input/output) REAL array, dimension (LDA1,N)
043: *          On entry, the N-by-N tile A1.
044: *          On exit, the elements on and above the diagonal of the array
045: *          contain the N-by-N upper trapezoidal tile R;
046: *          the elements below the diagonal are not referenced.
047: *
048: *  LDA1    (input) INTEGER
049: *          The leading dimension of the array A1.  LDA1 >= max(1,N).
050: *
051: *  A2      (input/output) REAL array, dimension (LDA2,N)
052: *          On entry, the M-by-N tile A2.
053: *          On exit, all the elements with the array TAU, represent 
054: *          the orthogonal tile Q as a product of elementary reflectors 
055: *          (see Further Details).
056: *
057: *  LDA2    (input) INTEGER
058: *          The leading dimension of the array A2.  LDA2 >= max(1,M).
059: *
060: *  T       (output) REAL array, dimension (LDT,N)
061: *          The IB-by-N triangular factor T of the block reflector.
062: *          T is upper triangular by block (economic storage);
063: *          The rest of the array is not referenced.
064: *
065: *  LDT     (input) INTEGER
066: *          The leading dimension of the array T. LDT >= IB.
067: *
068: *  TAU     (output) REAL array, dimension (min(M,N))
069: *          The scalar factors of the elementary reflectors (see Further
070: *          Details).
071: *
072: *  WORK    (workspace) REAL array, dimension (N)
073: *
074: *  INFO    (output) INTEGER
075: *          = 0: successful exit
076: *          < 0: if INFO = -i, the i-th argument had an illegal value
077: *
078: *  Further Details
079: *  ===============
080: *
081: *  The tile Q is represented as a product of elementary reflectors
082: *
083: *     Q = H(1) H(2) . . . H(k), where k = min(M,N).
084: *
085: *  Each H(i) has the form
086: *
087: *     H(i) = I - tau * v * v'
088: *
089: *  where tau is a real scalar, and v is a real vector with
090: *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i),
091: *  and tau in TAU(i).
092: *
093: *  =====================================================================
094: *
095: *     .. Parameters ..
096:       REAL            SONE, SZERO
097:       INTEGER            IONE
098:       PARAMETER          ( SONE = 1.0E+0 )
099:       PARAMETER          ( SZERO = 0.0E+0 )
100:       PARAMETER          ( IONE = 1 )
101: *     ..
102: *     .. Local Scalars ..
103:       INTEGER            I, II, SB, IINFO
104: *     ..
105: *     .. External Subroutines ..
106:       EXTERNAL           SLARFG, CORE_SSSMQR, XERBLA
107:       EXTERNAL           SCOPY, SGEMV, STRMV, SAXPY, SGER
108: *     ..
109: *     .. Intrinsic Functions ..
110:       INTRINSIC           MAX
111: *     ..
112: *     Test the input arguments.
113:       INFO = 0
114:       IF( M.LT.0 ) THEN
115:          INFO = -1
116:       ELSE IF( N.LT.0 ) THEN
117:          INFO = -2
118:       ELSE IF( IB.LT.0 ) THEN
119:          INFO = -3
120:       ELSE IF( LDA2.LT.MAX( 1, M ) ) THEN
121:          INFO = -7
122:       END IF
123:       IF( INFO.NE.0 ) THEN
124:          CALL XERBLA( 'CORE_STSQRT', -INFO )
125:          RETURN
126:       END IF
127: *
128: *     Quick return if possible.
129: *
130:       IF( M.EQ.0 .OR. N.EQ.0 .OR. IB.EQ.0 )
131:      $   RETURN
132: *
133:       DO 10 II = 1, N, IB
134:          SB = MIN( N-II+1, IB )
135:          DO 20 I = 1, SB
136: *
137: *           Generate elementary reflector H( II*IB+I ) to annihilate
138: *           A( II*IB+I:M, II*IB+I ).
139: *
140:             CALL SLARFG( M+1, A1( II+I-1, II+I-1 ), A2( 1, II+I-1 ),
141:      $                  IONE, TAU( II+I-1 ) )
142: *
143:             IF( ( II+I-1 ).LT.N ) THEN
144: *
145: *              Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
146: *
147:                CALL SCOPY( SB-I, A1( II+I-1, II+I ), LDA1,
148:      $                    WORK, IONE )
149:                CALL SGEMV( 'Transpose', M, SB-I, SONE,
150:      $                    A2( 1, II+I ), LDA2, A2( 1, II+I-1 ),
151:      $                    IONE, SONE, WORK, IONE )
152:                CALL SAXPY( SB-I, -( TAU( II+I-1 ) ), WORK, IONE, 
153:      $                    A1( II+I-1, II+I ), LDA1 ) 
154:                CALL SGER( M, SB-I, -( TAU( II+I-1 ) ),
155:      $                    A2( 1, II+I-1 ), IONE, WORK, IONE,
156:      $                    A2( 1, II+I ), LDA2 )
157:             END IF
158: *
159: *           Calculate T.
160: *
161:             CALL SGEMV( 'Transpose', M, I-1, -TAU( II+I-1 ),
162:      $                 A2( 1, II ), LDA2, A2( 1, II+I-1 ), IONE,
163:      $                 SZERO, T( 1, II+I-1 ), IONE )
164: *
165:             CALL STRMV( 'Upper', 'Notranspose', 'Nonunit', I-1,
166:      $                 T( 1, II ), LDT, T( 1, II+I-1 ), IONE )
167:             T( I, II+I-1 ) = TAU( II+I-1 )
168:  20      CONTINUE
169:          IF ( N.GT.( II+IB-1 ) ) THEN   
170:              CALL CORE_SSSMQR( 'Left', 'Transpose',
171:      $                    SB, M, N-( II+SB-1 ), IB, IB,
172:      $                    A1( II, II+SB ), LDA1,
173:      $                    A2( 1, II+SB ), LDA2,
174:      $                    A2( 1, II ), LDA2,
175:      $                    T( 1, II ), LDT, WORK, SB, IINFO )
176:          ENDIF
177:  10   CONTINUE
178: *
179:       RETURN
180: *
181: *     End of CORE_STSQRT.
182: *
183:       END
184: