Skip to content

Commit 3b94e1a

Browse files
Shabareesh ShettyShabareesh Shetty
authored andcommitted
refactor: reduce loops and arithmetic operations
--- type: pre_commit_static_analysis_report description: Results of running static analysis checks when committing changes. report: - task: lint_filenames status: passed - task: lint_editorconfig status: passed - task: lint_markdown status: na - task: lint_package_json status: na - task: lint_repl_help status: na - task: lint_javascript_src status: passed - task: lint_javascript_cli status: na - task: lint_javascript_examples status: na - task: lint_javascript_tests status: na - task: lint_javascript_benchmarks status: na - task: lint_python status: na - task: lint_r status: na - task: lint_c_src status: na - task: lint_c_examples status: na - task: lint_c_benchmarks status: na - task: lint_c_tests_fixtures status: na - task: lint_shell status: na - task: lint_typescript_declarations status: na - task: lint_typescript_tests status: na - task: lint_license_headers status: passed ---
1 parent 5f16c85 commit 3b94e1a

File tree

1 file changed

+84
-67
lines changed
  • lib/node_modules/@stdlib/blas/base/dtrsm/lib

1 file changed

+84
-67
lines changed

lib/node_modules/@stdlib/blas/base/dtrsm/lib/base.js

Lines changed: 84 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
131131
var sa1;
132132
var sb0;
133133
var sb1;
134+
var ia;
135+
var ib;
134136
var oa;
135137
var ob;
136138
var i;
@@ -167,26 +169,31 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
167169
( !isrma && side === 'right' && uplo === 'lower' && transa === 'no-transpose' )
168170
) {
169171
for ( k = N - 1; k >= 0; k-- ) {
170-
if ( nonunit ) {
171-
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
172-
tmp = 1.0 / A[ oa2 ];
173-
for ( i = 0; i < M; i++ ) {
174-
ob2 = offsetB + ( i * sb0 ) + ( k * sb1 );
175-
B[ ob2 ] *= tmp;
176-
}
177-
}
172+
oa = offsetA + ( k * sa0 );
173+
ob = offsetB + ( k * sb1 );
178174
for ( j = 0; j < k; j++ ) {
179-
oa2 = offsetA + ( j * sa1 ) + ( k * sa0 );
175+
oa2 = oa + ( j * sa1 );
180176
if ( A[ oa2 ] !== 0.0 ) {
181177
for ( i = 0; i < M; i++ ) {
182-
ob = offsetB + ( i * sb0 );
183-
B[ ob + ( j * sb1 ) ] -= A[ oa2 ] * B[ ob + ( k * sb1 ) ];
178+
ib = i * sb0;
179+
ob2 = ib + ( j * sb1 );
180+
B[ ob2 ] -= A[ oa2 ] * B[ ib + ob ];
181+
}
182+
}
183+
if ( nonunit ) {
184+
for ( i = 0; i < M; i++ ) {
185+
ib = i * sb0;
186+
oa2 = oa + ( k * sa1 );
187+
tmp = 1.0 / A[ oa2 ];
188+
ib += ob;
189+
B[ ib ] *= tmp;
184190
}
185191
}
186192
}
187193
if ( alpha !== 1.0 ) {
188194
for ( i = 0; i < M; i++ ) {
189-
ob2 = offsetB + ( i * sb0 ) + ( k * sb1 );
195+
ib = offsetB + ( i * sb0 );
196+
ob2 = ib + ob;
190197
B[ ob2 ] *= alpha;
191198
}
192199
}
@@ -201,19 +208,22 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
201208
ob = offsetB + ( j * sb0 );
202209
if ( alpha !== 1.0 ) {
203210
for ( i = 0; i < M; i++ ) {
204-
B[ ob + ( i * sb1 ) ] *= alpha;
211+
ib = ob + ( i * sb1 );
212+
B[ ib ] *= alpha;
205213
}
206214
}
207215
for ( k = 0; k < M; k++ ) {
208-
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
216+
oa = offsetA + ( k * sa0 );
217+
oa2 = oa + ( k * sa1 );
209218
ob2 = ob + ( k * sb1 );
210219
if ( B[ ob2 ] !== 0.0 ) {
211220
if ( nonunit ) {
212221
B[ ob2 ] /= A[ oa2 ];
213222
}
214223
for ( i = k + 1; i < M; i++ ) {
215-
oa2 = offsetA + ( i * sa1 ) + ( k * sa0 );
216-
B[ ob + ( i * sb1 ) ] -= B[ ob2 ] * A[ oa2 ];
224+
oa2 = oa + ( i * sa1 );
225+
ib = ob + ( i * sb1 );
226+
B[ ib ] -= B[ ob2 ] * A[ oa2 ];
217227
}
218228
}
219229
}
@@ -225,29 +235,29 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
225235
( !isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' )
226236
) {
227237
for ( j = 0; j < N; j++ ) {
238+
ob = offsetB + ( j * sb1 );
239+
oa = offsetA + ( j * sa0 );
228240
for ( i = 0; i < M; i++ ) {
229-
ob2 = offsetB + ( i * sb0 ) + ( j * sb1 );
241+
ob2 = ob + ( i * sb0 );
230242
if ( alpha !== 1.0 ) {
231243
B[ ob2 ] *= alpha;
232244
}
245+
if ( nonunit ) {
246+
oa2 = oa + ( j * sa1 );
247+
tmp = 1.0 / A[ oa2 ];
248+
B[ ob2 ] *= tmp;
249+
}
233250
}
234251
for ( k = 0; k < j; k++ ) {
235252
for ( i = 0; i < M; i++ ) {
236-
ob = offsetB + ( i * sb0 );
237-
oa2 = offsetA + ( k * sa1 ) + ( j * sa0 );
253+
ib = offsetB + ( i * sb0 );
254+
oa2 = oa + ( k * sa1 );
255+
ob2 = ib + ( k * sb1 );
238256
if ( A[ oa2 ] !== 0.0 ) {
239-
B[ ob + ( j * sb1 ) ] -= A[ oa2 ] * B[ ob + ( k * sb1 ) ];
257+
B[ ib + ob ] -= A[ oa2 ] * B[ ob2 ];
240258
}
241259
}
242260
}
243-
if ( nonunit ) {
244-
oa2 = offsetA + ( j * sa1 ) + (j * sa0 );
245-
tmp = 1.0 / A[ oa2 ];
246-
for ( i = 0; i < M; i++ ) {
247-
ob2 = offsetB + ( i * sb0 ) + ( j * sb1 );
248-
B[ ob2 ] *= tmp;
249-
}
250-
}
251261
}
252262
return B;
253263
}
@@ -259,20 +269,22 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
259269
ob = offsetB + ( j * sb0 );
260270
if ( alpha !== 1.0 ) {
261271
for ( i = 0; i < M; i++ ) {
262-
B[ ob + ( i * sb1 ) ] *= alpha;
272+
ib = ob + ( i * sb1 );
273+
B[ ib ] *= alpha;
263274
}
264275
}
265276
for ( i = M - 1; i >= 0; i-- ) {
277+
oa = offsetA + ( i * sa0 );
266278
ob2 = ob + ( i * sb1 );
267279
for ( k = i + 1; k < M; k++ ) {
268-
oa2 = offsetA + ( i * sa0 ) + ( k * sa1 );
269-
B[ ob2 ] -= A[ oa2 ] * B[ ob + ( k * sb1 ) ];
280+
oa2 = oa + ( k * sa1 );
281+
ib = ob + ( k * sb1 );
282+
B[ ob2 ] -= A[ oa2 ] * B[ ib ];
270283
}
271284
if ( nonunit ) {
272-
oa2 = offsetA + ( i * sa0 ) + ( i * sa1 );
285+
oa2 = oa + ( i * sa1 );
273286
B[ ob2 ] /= A[ oa2 ];
274287
}
275-
B[ ob + ( i * sb1 ) ] = B[ ob2 ];
276288
}
277289
}
278290
return B;
@@ -284,16 +296,19 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
284296
for ( j = 0; j < N; j++ ) {
285297
ob = offsetB + ( j * sb1 );
286298
for ( i = 0; i < M; i++ ) {
287-
oa2 = offsetA + ( i * sa1 ) + ( i * sa0 );
288-
tmp = B[ ob + ( i * sb0 ) ] * alpha;
299+
oa = offsetA + ( i * sa0 );
300+
oa2 = oa + ( i * sa1 );
301+
ob2 = ob + ( i * sb0 );
302+
tmp = B[ ob2 ] * alpha;
289303
for ( k = 0; k < i; k++ ) {
290-
oa = offsetA + ( k * sa1 );
291-
tmp -= A[ oa + ( i * sa0 ) ] * B[ ob + ( k * sb0 ) ];
304+
oa += k * sa1;
305+
ib = ob + ( k * sb0 );
306+
tmp -= A[ oa ] * B[ ib ];
292307
}
293308
if ( nonunit ) {
294309
tmp /= A[ oa2 ];
295310
}
296-
B[ ob + ( i * sb0 ) ] = tmp;
311+
B[ ob2 ] = tmp;
297312
}
298313
}
299314
return B;
@@ -303,29 +318,29 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
303318
( !isrma && side === 'left' && uplo === 'upper' && transa === 'no-transpose' )
304319
) {
305320
for ( j = N - 1; j >= 0; j-- ) {
321+
oa = offsetA + ( j * sa0 );
322+
ob = offsetB + ( j * sb0 );
306323
for ( i = 0; i < M; i++ ) {
307-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
324+
ob2 = ob + ( i * sb1 );
308325
if ( alpha !== 1.0 ) {
309326
B[ ob2 ] *= alpha;
310327
}
328+
if ( nonunit ) {
329+
oa2 = oa + ( j * sa1 );
330+
tmp = 1.0 / A[ oa2 ];
331+
B[ ob2 ] *= tmp;
332+
}
311333
}
312334
for ( k = j + 1; k < N; k++ ) {
335+
ia = k * sa1;
313336
for ( i = 0; i < M; i++ ) {
314-
ob2 = offsetB + ( i * sb1 );
315-
oa2 = offsetA + ( k * sa1 );
316-
if ( A[ oa2 + ( j * sa0 ) ] !== 0.0 ) {
317-
B[ ob2 + ( j * sb0 ) ] -= A[ oa2 + ( j * sa0 ) ] * B[ ob2 + ( k * sb0 ) ];
337+
ib = i * sb1;
338+
if ( A[ ia + oa ] !== 0.0 ) {
339+
ob2 = ib + ( k * sb0 );
340+
B[ ib + ob ] -= A[ ia + oa ] * B[ ob2 ];
318341
}
319342
}
320343
}
321-
if ( nonunit ) {
322-
oa2 = offsetA + ( j * sa1 ) + ( j * sa0 );
323-
tmp = 1.0 / A[ oa2 ];
324-
for ( i = 0; i < M; i++ ) {
325-
ob2 = offsetB + ( i * sb1 ) + ( j * sb0 );
326-
B[ ob2 ] *= tmp;
327-
}
328-
}
329344
}
330345
return B;
331346
}
@@ -337,19 +352,22 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
337352
ob = offsetB + ( j * sb1 );
338353
if ( alpha !== 1.0 ) {
339354
for ( i = 0; i < M; i++ ) {
340-
B[ ob + ( i * sb0 ) ] = B[ ob + ( i * sb0 ) ] * alpha;
355+
ob2 = ob + ( i * sb0 );
356+
B[ ob2 ] *= alpha;
341357
}
342358
}
343359
for ( k = M - 1; k >= 0; k-- ) {
344-
oa2 = offsetA + ( k * sa1 ) + ( k * sa0 );
360+
oa = offsetA + ( k * sa0 );
361+
oa2 = oa + ( k * sa1 );
345362
ob2 = ob + ( k * sb0 );
346363
if ( B[ ob2 ] !== 0.0 ) {
347364
if ( nonunit ) {
348365
B[ ob2 ] /= A[ oa2 ];
349366
}
350367
for ( i= 0; i < k; i++ ) {
351-
oa = offsetA + ( i * sa1 ) + ( k * sa0 );
352-
B[ ob + ( i * sb0 ) ] -= B[ ob2 ] * A[ oa ];
368+
oa += i * sa1;
369+
ib = ob + ( i * sb0 );
370+
B[ ib ] -= B[ ob2 ] * A[ oa ];
353371
}
354372
}
355373
}
@@ -359,26 +377,25 @@ function dtrsm( side, uplo, transa, diag, M, N, alpha, A, strideA1, strideA2, of
359377
// ( isrma && side === 'right' && uplo === 'lower' && transa !== 'no-transpose' ) || ( !isrma && side === 'left' && uplo === 'upper' && transa !== 'no-transpose' )
360378
for ( k = 0; k < N; k++ ) {
361379
ob = offsetB + ( k * sb0 );
362-
oa = offsetA + ( k * sa1 );
363-
oa2 = oa + ( k * sa0 );
364-
if ( nonunit ) {
365-
tmp = 1.0 / A[ oa2 ];
366-
for ( i = 0; i < M; i++ ) {
367-
B[ ob + ( i * sb1 ) ] *= tmp;
368-
}
369-
}
380+
oa = offsetA + ( k * sa0 );
370381
for ( j = k + 1; j < N; j++ ) {
371-
ob2 = offsetB + ( j * sb0 );
372-
oa2 = offsetA + ( j * sa1 ) + ( k * sa0 );
382+
ib = offsetB + ( j * sb0 );
383+
oa2 = oa + ( j * sa1 );
373384
if ( A[ oa2 ] !== 0.0 ) {
374385
for ( i = 0; i < M; i++ ) {
375-
B[ ob2 + ( i * sb1 ) ] -= A[ oa2 ] * B[ ob + ( i * sb1 ) ];
386+
ob2 = ob + ( i * sb1 );
387+
B[ ib + ( i * sb1 ) ] -= A[ oa2 ] * B[ ob2 ];
388+
if ( nonunit ) {
389+
tmp = 1.0 / A[ oa2 ];
390+
B[ ob2 ] *= tmp;
391+
}
376392
}
377393
}
378394
}
379395
if ( alpha !== 1.0 ) {
380396
for ( i = 0; i < M; i++ ) {
381-
B[ ob + ( i * sb1 ) ] *= alpha;
397+
ob2 = ob + ( i * sb1 );
398+
B[ ob2 ] *= alpha;
382399
}
383400
}
384401
}

0 commit comments

Comments
 (0)