@@ -131,6 +131,8 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
131131 var sa1 ;
132132 var sb0 ;
133133 var sb1 ;
134+ var ia ;
135+ var ib ;
134136 var oa ;
135137 var ob ;
136138 var i ;
@@ -167,26 +169,31 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
167169 ( ! isrma && side === 'right' && uplo === 'lower' && transa === 'no-transpose' )
168170 ) {
169171 for ( k = N - 1 ; k >= 0 ; k -- ) {
170- if ( nonunit ) {
171- oa2 = offsetA + ( k * sa1 ) + ( k * sa0 ) ;
172- tmp = 1.0 / A [ oa2 ] ;
173- for ( i = 0 ; i < M ; i ++ ) {
174- ob2 = offsetB + ( i * sb0 ) + ( k * sb1 ) ;
175- B [ ob2 ] *= tmp ;
176- }
177- }
172+ oa = offsetA + ( k * sa0 ) ;
173+ ob = offsetB + ( k * sb1 ) ;
178174 for ( j = 0 ; j < k ; j ++ ) {
179- oa2 = offsetA + ( j * sa1 ) + ( k * sa0 ) ;
175+ oa2 = oa + ( j * sa1 ) ;
180176 if ( A [ oa2 ] !== 0.0 ) {
181177 for ( i = 0 ; i < M ; i ++ ) {
182- ob = offsetB + ( i * sb0 ) ;
183- B [ ob + ( j * sb1 ) ] -= A [ oa2 ] * B [ ob + ( k * sb1 ) ] ;
178+ ib = i * sb0 ;
179+ ob2 = ib + ( j * sb1 ) ;
180+ B [ ob2 ] -= A [ oa2 ] * B [ ib + ob ] ;
181+ }
182+ }
183+ if ( nonunit ) {
184+ for ( i = 0 ; i < M ; i ++ ) {
185+ ib = i * sb0 ;
186+ oa2 = oa + ( k * sa1 ) ;
187+ tmp = 1.0 / A [ oa2 ] ;
188+ ib += ob ;
189+ B [ ib ] *= tmp ;
184190 }
185191 }
186192 }
187193 if ( alpha !== 1.0 ) {
188194 for ( i = 0 ; i < M ; i ++ ) {
189- ob2 = offsetB + ( i * sb0 ) + ( k * sb1 ) ;
195+ ib = offsetB + ( i * sb0 ) ;
196+ ob2 = ib + ob ;
190197 B [ ob2 ] *= alpha ;
191198 }
192199 }
@@ -201,19 +208,22 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
201208 ob = offsetB + ( j * sb0 ) ;
202209 if ( alpha !== 1.0 ) {
203210 for ( i = 0 ; i < M ; i ++ ) {
204- B [ ob + ( i * sb1 ) ] *= alpha ;
211+ ib = ob + ( i * sb1 ) ;
212+ B [ ib ] *= alpha ;
205213 }
206214 }
207215 for ( k = 0 ; k < M ; k ++ ) {
208- oa2 = offsetA + ( k * sa1 ) + ( k * sa0 ) ;
216+ oa = offsetA + ( k * sa0 ) ;
217+ oa2 = oa + ( k * sa1 ) ;
209218 ob2 = ob + ( k * sb1 ) ;
210219 if ( B [ ob2 ] !== 0.0 ) {
211220 if ( nonunit ) {
212221 B [ ob2 ] /= A [ oa2 ] ;
213222 }
214223 for ( i = k + 1 ; i < M ; i ++ ) {
215- oa2 = offsetA + ( i * sa1 ) + ( k * sa0 ) ;
216- B [ ob + ( i * sb1 ) ] -= B [ ob2 ] * A [ oa2 ] ;
224+ oa2 = oa + ( i * sa1 ) ;
225+ ib = ob + ( i * sb1 ) ;
226+ B [ ib ] -= B [ ob2 ] * A [ oa2 ] ;
217227 }
218228 }
219229 }
@@ -225,29 +235,29 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
225235 ( ! isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' )
226236 ) {
227237 for ( j = 0 ; j < N ; j ++ ) {
238+ ob = offsetB + ( j * sb1 ) ;
239+ oa = offsetA + ( j * sa0 ) ;
228240 for ( i = 0 ; i < M ; i ++ ) {
229- ob2 = offsetB + ( i * sb0 ) + ( j * sb1 ) ;
241+ ob2 = ob + ( i * sb0 ) ;
230242 if ( alpha !== 1.0 ) {
231243 B [ ob2 ] *= alpha ;
232244 }
245+ if ( nonunit ) {
246+ oa2 = oa + ( j * sa1 ) ;
247+ tmp = 1.0 / A [ oa2 ] ;
248+ B [ ob2 ] *= tmp ;
249+ }
233250 }
234251 for ( k = 0 ; k < j ; k ++ ) {
235252 for ( i = 0 ; i < M ; i ++ ) {
236- ob = offsetB + ( i * sb0 ) ;
237- oa2 = offsetA + ( k * sa1 ) + ( j * sa0 ) ;
253+ ib = offsetB + ( i * sb0 ) ;
254+ oa2 = oa + ( k * sa1 ) ;
255+ ob2 = ib + ( k * sb1 ) ;
238256 if ( A [ oa2 ] !== 0.0 ) {
239- B [ ob + ( j * sb1 ) ] -= A [ oa2 ] * B [ ob + ( k * sb1 ) ] ;
257+ B [ ib + ob ] -= A [ oa2 ] * B [ ob2 ] ;
240258 }
241259 }
242260 }
243- if ( nonunit ) {
244- oa2 = offsetA + ( j * sa1 ) + ( j * sa0 ) ;
245- tmp = 1.0 / A [ oa2 ] ;
246- for ( i = 0 ; i < M ; i ++ ) {
247- ob2 = offsetB + ( i * sb0 ) + ( j * sb1 ) ;
248- B [ ob2 ] *= tmp ;
249- }
250- }
251261 }
252262 return B ;
253263 }
@@ -259,20 +269,22 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
259269 ob = offsetB + ( j * sb0 ) ;
260270 if ( alpha !== 1.0 ) {
261271 for ( i = 0 ; i < M ; i ++ ) {
262- B [ ob + ( i * sb1 ) ] *= alpha ;
272+ ib = ob + ( i * sb1 ) ;
273+ B [ ib ] *= alpha ;
263274 }
264275 }
265276 for ( i = M - 1 ; i >= 0 ; i -- ) {
277+ oa = offsetA + ( i * sa0 ) ;
266278 ob2 = ob + ( i * sb1 ) ;
267279 for ( k = i + 1 ; k < M ; k ++ ) {
268- oa2 = offsetA + ( i * sa0 ) + ( k * sa1 ) ;
269- B [ ob2 ] -= A [ oa2 ] * B [ ob + ( k * sb1 ) ] ;
280+ oa2 = oa + ( k * sa1 ) ;
281+ ib = ob + ( k * sb1 ) ;
282+ B [ ob2 ] -= A [ oa2 ] * B [ ib ] ;
270283 }
271284 if ( nonunit ) {
272- oa2 = offsetA + ( i * sa0 ) + ( i * sa1 ) ;
285+ oa2 = oa + ( i * sa1 ) ;
273286 B [ ob2 ] /= A [ oa2 ] ;
274287 }
275- B [ ob + ( i * sb1 ) ] = B [ ob2 ] ;
276288 }
277289 }
278290 return B ;
@@ -284,16 +296,19 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
284296 for ( j = 0 ; j < N ; j ++ ) {
285297 ob = offsetB + ( j * sb1 ) ;
286298 for ( i = 0 ; i < M ; i ++ ) {
287- oa2 = offsetA + ( i * sa1 ) + ( i * sa0 ) ;
288- tmp = B [ ob + ( i * sb0 ) ] * alpha ;
299+ oa = offsetA + ( i * sa0 ) ;
300+ oa2 = oa + ( i * sa1 ) ;
301+ ob2 = ob + ( i * sb0 ) ;
302+ tmp = B [ ob2 ] * alpha ;
289303 for ( k = 0 ; k < i ; k ++ ) {
290- oa = offsetA + ( k * sa1 ) ;
291- tmp -= A [ oa + ( i * sa0 ) ] * B [ ob + ( k * sb0 ) ] ;
304+ oa += k * sa1 ;
305+ ib = ob + ( k * sb0 ) ;
306+ tmp -= A [ oa ] * B [ ib ] ;
292307 }
293308 if ( nonunit ) {
294309 tmp /= A [ oa2 ] ;
295310 }
296- B [ ob + ( i * sb0 ) ] = tmp ;
311+ B [ ob2 ] = tmp ;
297312 }
298313 }
299314 return B ;
@@ -303,29 +318,29 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
303318 ( ! isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' )
304319 ) {
305320 for ( j = N - 1 ; j >= 0 ; j -- ) {
321+ oa = offsetA + ( j * sa0 ) ;
322+ ob = offsetB + ( j * sb0 ) ;
306323 for ( i = 0 ; i < M ; i ++ ) {
307- ob2 = offsetB + ( i * sb1 ) + ( j * sb0 ) ;
324+ ob2 = ob + ( i * sb1 ) ;
308325 if ( alpha !== 1.0 ) {
309326 B [ ob2 ] *= alpha ;
310327 }
328+ if ( nonunit ) {
329+ oa2 = oa + ( j * sa1 ) ;
330+ tmp = 1.0 / A [ oa2 ] ;
331+ B [ ob2 ] *= tmp ;
332+ }
311333 }
312334 for ( k = j + 1 ; k < N ; k ++ ) {
335+ ia = k * sa1 ;
313336 for ( i = 0 ; i < M ; i ++ ) {
314- ob2 = offsetB + ( i * sb1 ) ;
315- oa2 = offsetA + ( k * sa1 ) ;
316- if ( A [ oa2 + ( j * sa0 ) ] !== 0.0 ) {
317- B [ ob2 + ( j * sb0 ) ] -= A [ oa2 + ( j * sa0 ) ] * B [ ob2 + ( k * sb0 ) ] ;
337+ ib = i * sb1 ;
338+ if ( A [ ia + oa ] !== 0.0 ) {
339+ ob2 = ib + ( k * sb0 ) ;
340+ B [ ib + ob ] -= A [ ia + oa ] * B [ ob2 ] ;
318341 }
319342 }
320343 }
321- if ( nonunit ) {
322- oa2 = offsetA + ( j * sa1 ) + ( j * sa0 ) ;
323- tmp = 1.0 / A [ oa2 ] ;
324- for ( i = 0 ; i < M ; i ++ ) {
325- ob2 = offsetB + ( i * sb1 ) + ( j * sb0 ) ;
326- B [ ob2 ] *= tmp ;
327- }
328- }
329344 }
330345 return B ;
331346 }
@@ -337,19 +352,22 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
337352 ob = offsetB + ( j * sb1 ) ;
338353 if ( alpha !== 1.0 ) {
339354 for ( i = 0 ; i < M ; i ++ ) {
340- B [ ob + ( i * sb0 ) ] = B [ ob + ( i * sb0 ) ] * alpha ;
355+ ob2 = ob + ( i * sb0 ) ;
356+ B [ ob2 ] *= alpha ;
341357 }
342358 }
343359 for ( k = M - 1 ; k >= 0 ; k -- ) {
344- oa2 = offsetA + ( k * sa1 ) + ( k * sa0 ) ;
360+ oa = offsetA + ( k * sa0 ) ;
361+ oa2 = oa + ( k * sa1 ) ;
345362 ob2 = ob + ( k * sb0 ) ;
346363 if ( B [ ob2 ] !== 0.0 ) {
347364 if ( nonunit ) {
348365 B [ ob2 ] /= A [ oa2 ] ;
349366 }
350367 for ( i = 0 ; i < k ; i ++ ) {
351- oa = offsetA + ( i * sa1 ) + ( k * sa0 ) ;
352- B [ ob + ( i * sb0 ) ] -= B [ ob2 ] * A [ oa ] ;
368+ oa += i * sa1 ;
369+ ib = ob + ( i * sb0 ) ;
370+ B [ ib ] -= B [ ob2 ] * A [ oa ] ;
353371 }
354372 }
355373 }
@@ -359,26 +377,25 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
359377 // ( isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' ) || ( !isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' )
360378 for ( k = 0 ; k < N ; k ++ ) {
361379 ob = offsetB + ( k * sb0 ) ;
362- oa = offsetA + ( k * sa1 ) ;
363- oa2 = oa + ( k * sa0 ) ;
364- if ( nonunit ) {
365- tmp = 1.0 / A [ oa2 ] ;
366- for ( i = 0 ; i < M ; i ++ ) {
367- B [ ob + ( i * sb1 ) ] *= tmp ;
368- }
369- }
380+ oa = offsetA + ( k * sa0 ) ;
370381 for ( j = k + 1 ; j < N ; j ++ ) {
371- ob2 = offsetB + ( j * sb0 ) ;
372- oa2 = offsetA + ( j * sa1 ) + ( k * sa0 ) ;
382+ ib = offsetB + ( j * sb0 ) ;
383+ oa2 = oa + ( j * sa1 ) ;
373384 if ( A [ oa2 ] !== 0.0 ) {
374385 for ( i = 0 ; i < M ; i ++ ) {
375- B [ ob2 + ( i * sb1 ) ] -= A [ oa2 ] * B [ ob + ( i * sb1 ) ] ;
386+ ob2 = ob + ( i * sb1 ) ;
387+ B [ ib + ( i * sb1 ) ] -= A [ oa2 ] * B [ ob2 ] ;
388+ if ( nonunit ) {
389+ tmp = 1.0 / A [ oa2 ] ;
390+ B [ ob2 ] *= tmp ;
391+ }
376392 }
377393 }
378394 }
379395 if ( alpha !== 1.0 ) {
380396 for ( i = 0 ; i < M ; i ++ ) {
381- B [ ob + ( i * sb1 ) ] *= alpha ;
397+ ob2 = ob + ( i * sb1 ) ;
398+ B [ ob2 ] *= alpha ;
382399 }
383400 }
384401 }
0 commit comments