Skip to content

Commit ab54bb4

Browse files
committed
improve sieve:collect performance +++ minor improvements #38
1 parent 79dbb1c commit ab54bb4

File tree

2 files changed

+159
-36
lines changed

2 files changed

+159
-36
lines changed

src/main/java/de/tilman_neumann/jml/factor/siqs/sieve/Sieve03h.java

Lines changed: 147 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import static de.tilman_neumann.jml.factor.base.GlobalFactoringOptions.*;
1818

1919
import java.math.BigInteger;
20+
import java.nio.ByteBuffer;
21+
import java.nio.ByteOrder;
2022

2123
import org.apache.logging.log4j.Logger;
2224
import org.apache.logging.log4j.LogManager;
@@ -65,13 +67,18 @@
6567
*
6668
* Version 03h:
6769
* -> unroll largest primes with the same logP value
70+
* -> using a ByteBuffer allows to access the byte array as longs in the collect phase
6871
*
6972
* @author Tilman Neumann
7073
*/
7174
public class Sieve03h implements Sieve {
7275
private static final Logger LOG = LogManager.getLogger(Sieve03h.class);
7376
private static final boolean DEBUG = false;
7477

78+
private static final long LONG_MASK = 0x8080808080808080L;
79+
private static final long UPPER_MASK = 0x8080808000000000L;
80+
private static final long LOWER_MASK = 0x80808080L;
81+
7582
private static final double LN2 = Math.log(2.0);
7683

7784
private BigInteger daParam, bParam, cParam, kN;
@@ -106,12 +113,15 @@ public class Sieve03h implements Sieve {
106113

107114
// sieve
108115
private int sieveArraySize;
116+
private int sieveAllocationSize;
109117
/** the initalizer value */
110118
private byte initializerValue;
111119
/** basic building block for fast initialization of sieve array */
112120
private byte[] initializerBlock;
113121
/** the array holding logP sums for all x */
114122
private byte[] sieveArray;
123+
/** a view on the sieveArray that allows to access it in pieces of longs */
124+
ByteBuffer sieveBuffer;
115125

116126
/** buffers for trial division engine. */
117127
private UnsignedBigInt Q_rest_UBI = new UnsignedBigInt(new int[50]);
@@ -159,9 +169,11 @@ public void initializeForN(SieveParams sieveParams, BaseArrays baseArrays, int m
159169
// For primes p[i], i<p1Index, we need p[i]+sieveArraySize = 2*sieveArraySize entries.
160170
this.sieveArraySize = sieveParams.sieveArraySize;
161171
int pMax = sieveParams.pMax;
162-
int sieveAllocationSize = Math.max(pMax+1, 2*sieveArraySize);
172+
sieveAllocationSize = Math.max(pMax+1, 2*sieveArraySize);
163173
sieveArray = new byte[sieveAllocationSize];
164-
if (DEBUG) LOG.debug("pMax = " + pMax + ", sieveArraySize = " + sieveArraySize + " --> sieveAllocationSize = " + sieveAllocationSize);
174+
sieveBuffer = ByteBuffer.wrap(sieveArray).order(ByteOrder.LITTLE_ENDIAN);
175+
176+
if (DEBUG) LOG.debug("pMax = " + pMax + ", sieveArraySize = " + sieveArraySize + " --> sieveAllocationSize = " + sieveAllocationSize);
165177

166178
if (ANALYZE) {
167179
sieveHitCount = 0;
@@ -299,17 +311,56 @@ public SieveResult sieve() {
299311
} // end for (p)
300312
if (ANALYZE) sieveDuration += timer.capture();
301313

302-
// collect results
303-
// let the sieve entry counter x run down to 0 is much faster because of the simpler exit condition
304-
for (int x=sieveArraySize-1; x>=0; ) {
305-
// Unfortunately, in Java we can not cast byte[] to int[] or long[].
306-
// So we have to use 'or'. More than 4 'or's do not pay out.
307-
if (((sieveArray[x--] | sieveArray[x--] | sieveArray[x--] | sieveArray[x--]) & 0x80) != 0) {
308-
// at least one of the tested Q(x) is sufficiently smooth to be passed to trial division!
309-
if (sieveArray[x+1] < 0) addSmoothCandidate(x+1, sieveArray[x+1] & 0xFF);
310-
if (sieveArray[x+2] < 0) addSmoothCandidate(x+2, sieveArray[x+2] & 0xFF);
311-
if (sieveArray[x+3] < 0) addSmoothCandidate(x+3, sieveArray[x+3] & 0xFF);
312-
if (sieveArray[x+4] < 0) addSmoothCandidate(x+4, sieveArray[x+4] & 0xFF);
314+
sieveBuffer.rewind();
315+
316+
// collect results: we check 8 sieve locations in one long and 32 longs at once
317+
int x = 0;
318+
int sieveArraySizeLong = sieveArraySize >> 3;
319+
while (x < sieveArraySizeLong) {
320+
long t = sieveBuffer.getLong();
321+
t |= sieveBuffer.getLong();
322+
t |= sieveBuffer.getLong();
323+
t |= sieveBuffer.getLong();
324+
t |= sieveBuffer.getLong();
325+
t |= sieveBuffer.getLong();
326+
t |= sieveBuffer.getLong();
327+
t |= sieveBuffer.getLong();
328+
t |= sieveBuffer.getLong();
329+
t |= sieveBuffer.getLong();
330+
t |= sieveBuffer.getLong();
331+
t |= sieveBuffer.getLong();
332+
t |= sieveBuffer.getLong();
333+
t |= sieveBuffer.getLong();
334+
t |= sieveBuffer.getLong();
335+
t |= sieveBuffer.getLong();
336+
t |= sieveBuffer.getLong();
337+
t |= sieveBuffer.getLong();
338+
t |= sieveBuffer.getLong();
339+
t |= sieveBuffer.getLong();
340+
t |= sieveBuffer.getLong();
341+
t |= sieveBuffer.getLong();
342+
t |= sieveBuffer.getLong();
343+
t |= sieveBuffer.getLong();
344+
t |= sieveBuffer.getLong();
345+
t |= sieveBuffer.getLong();
346+
t |= sieveBuffer.getLong();
347+
t |= sieveBuffer.getLong();
348+
t |= sieveBuffer.getLong();
349+
t |= sieveBuffer.getLong();
350+
t |= sieveBuffer.getLong();
351+
t |= sieveBuffer.getLong();
352+
x += 32;
353+
if ((t & LONG_MASK) == 0) continue;
354+
355+
// go back and look in more detail
356+
x -= 32;
357+
sieveBuffer.position(x<<3);
358+
359+
for (int l=0; l<32; l++, x++) {
360+
final long y = sieveBuffer.getLong();
361+
if ((y & LONG_MASK) != 0) {
362+
testLongPositive(y, x<<3);
363+
}
313364
}
314365
}
315366
if (ANALYZE) collectDuration += timer.capture();
@@ -373,17 +424,55 @@ public SieveResult sieve() {
373424
} // end for (p)
374425
if (ANALYZE) sieveDuration += timer.capture();
375426

376-
// collect results
377-
// let the sieve entry counter x run down to 0 is much faster because of the simpler exit condition
378-
for (int x=sieveArraySize-1; x>=0; ) {
379-
// Unfortunately, in Java we can not cast byte[] to int[] or long[].
380-
// So we have to use 'or'. More than 4 'or's do not pay out.
381-
if (((sieveArray[x--] | sieveArray[x--] | sieveArray[x--] | sieveArray[x--]) & 0x80) != 0) {
382-
// at least one of the tested Q(-x) is sufficiently smooth to be passed to trial division!
383-
if (sieveArray[x+1] < 0) addSmoothCandidate(-(x+1), sieveArray[x+1] & 0xFF);
384-
if (sieveArray[x+2] < 0) addSmoothCandidate(-(x+2), sieveArray[x+2] & 0xFF);
385-
if (sieveArray[x+3] < 0) addSmoothCandidate(-(x+3), sieveArray[x+3] & 0xFF);
386-
if (sieveArray[x+4] < 0) addSmoothCandidate(-(x+4), sieveArray[x+4] & 0xFF);
427+
sieveBuffer.rewind();
428+
429+
// collect results: we check 8 sieve locations in one long and 32 longs at once
430+
x = 0;
431+
while (x < sieveArraySizeLong) {
432+
long t = sieveBuffer.getLong();
433+
t |= sieveBuffer.getLong();
434+
t |= sieveBuffer.getLong();
435+
t |= sieveBuffer.getLong();
436+
t |= sieveBuffer.getLong();
437+
t |= sieveBuffer.getLong();
438+
t |= sieveBuffer.getLong();
439+
t |= sieveBuffer.getLong();
440+
t |= sieveBuffer.getLong();
441+
t |= sieveBuffer.getLong();
442+
t |= sieveBuffer.getLong();
443+
t |= sieveBuffer.getLong();
444+
t |= sieveBuffer.getLong();
445+
t |= sieveBuffer.getLong();
446+
t |= sieveBuffer.getLong();
447+
t |= sieveBuffer.getLong();
448+
t |= sieveBuffer.getLong();
449+
t |= sieveBuffer.getLong();
450+
t |= sieveBuffer.getLong();
451+
t |= sieveBuffer.getLong();
452+
t |= sieveBuffer.getLong();
453+
t |= sieveBuffer.getLong();
454+
t |= sieveBuffer.getLong();
455+
t |= sieveBuffer.getLong();
456+
t |= sieveBuffer.getLong();
457+
t |= sieveBuffer.getLong();
458+
t |= sieveBuffer.getLong();
459+
t |= sieveBuffer.getLong();
460+
t |= sieveBuffer.getLong();
461+
t |= sieveBuffer.getLong();
462+
t |= sieveBuffer.getLong();
463+
t |= sieveBuffer.getLong();
464+
x += 32;
465+
if ((t & LONG_MASK) == 0) continue;
466+
467+
// go back and look in more detail
468+
x -= 32;
469+
sieveBuffer.position(x<<3);
470+
471+
for (int l=0; l<32; l++, x++) {
472+
final long y = sieveBuffer.getLong();
473+
if ((y & LONG_MASK) != 0) {
474+
testLongNegative(y, x<<3);
475+
}
387476
}
388477
}
389478
if (ANALYZE) collectDuration += timer.capture();
@@ -407,6 +496,40 @@ private void initializeSieveArray(int sieveArraySize) {
407496
}
408497
}
409498

499+
private void testLongPositive(long y, int x) {
500+
if ((y & LOWER_MASK) != 0) {
501+
final int y0 = (int) y;
502+
if ((y0 & 0x80) != 0) addSmoothCandidate(x , y0 & 0xFF);
503+
if ((y0 & 0x8000) != 0) addSmoothCandidate(x+1, (y0>> 8) & 0xFF);
504+
if ((y0 & 0x800000) != 0) addSmoothCandidate(x+2, (y0>>16) & 0xFF);
505+
if ((y0 & 0x80000000) != 0) addSmoothCandidate(x+3, (y0>>24) & 0xFF);
506+
}
507+
if((y & UPPER_MASK) != 0) {
508+
final int y1 = (int) (y >> 32);
509+
if ((y1 & 0x80) != 0) addSmoothCandidate(x+4, y1 & 0xFF);
510+
if ((y1 & 0x8000) != 0) addSmoothCandidate(x+5, (y1>> 8) & 0xFF);
511+
if ((y1 & 0x800000) != 0) addSmoothCandidate(x+6, (y1>>16) & 0xFF);
512+
if ((y1 & 0x80000000) != 0) addSmoothCandidate(x+7, (y1>>24) & 0xFF);
513+
}
514+
}
515+
516+
private void testLongNegative(long y, int x) {
517+
if ((y & LOWER_MASK) != 0) {
518+
final int y0 = (int) y;
519+
if ((y0 & 0x80) != 0) addSmoothCandidate(- x , y0 & 0xFF);
520+
if ((y0 & 0x8000) != 0) addSmoothCandidate(-(x+1), (y0>> 8) & 0xFF);
521+
if ((y0 & 0x800000) != 0) addSmoothCandidate(-(x+2), (y0>>16) & 0xFF);
522+
if ((y0 & 0x80000000) != 0) addSmoothCandidate(-(x+3), (y0>>24) & 0xFF);
523+
}
524+
if((y & UPPER_MASK) != 0) {
525+
final int y1 = (int) (y >> 32);
526+
if ((y1 & 0x80) != 0) addSmoothCandidate(-(x+4), y1 & 0xFF);
527+
if ((y1 & 0x8000) != 0) addSmoothCandidate(-(x+5), (y1>> 8) & 0xFF);
528+
if ((y1 & 0x800000) != 0) addSmoothCandidate(-(x+6), (y1>>16) & 0xFF);
529+
if ((y1 & 0x80000000) != 0) addSmoothCandidate(-(x+7), (y1>>24) & 0xFF);
530+
}
531+
}
532+
410533
private void addSmoothCandidate(int x, int score) {
411534
if (ANALYZE) sieveHitCount++;
412535

src/main/java/de/tilman_neumann/jml/factor/siqs/sieve/Sieve03hU.java

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,9 @@ public SieveResult sieve() {
308308
} // end for (p)
309309
if (ANALYZE) sieveDuration += timer.capture();
310310

311-
// collect results: we check 8 sieve locations in one long
311+
// collect results: we check 8 sieve locations in one long and 32 longs at once
312312
long x = sieveArrayAddress-8;
313-
while (x<sieveArrayAddress+sieveArraySize-8) {
313+
while (x < sieveArrayAddress+sieveArraySize-8) {
314314
long t = UNSAFE.getLong(x+=8);
315315
t |= UNSAFE.getLong(x+=8);
316316
t |= UNSAFE.getLong(x+=8);
@@ -343,14 +343,14 @@ public SieveResult sieve() {
343343
t |= UNSAFE.getLong(x+=8);
344344
t |= UNSAFE.getLong(x+=8);
345345
t |= UNSAFE.getLong(x+=8);
346-
if((t & LONG_MASK) == 0) continue;
346+
if ((t & LONG_MASK) == 0) continue;
347347

348-
// back up to get the last 8 and look in more detail
348+
// go back and look in more detail
349349
x -= 256;
350350

351-
for(int l=0; l<32; l++) {
351+
for (int l=0; l<32; l++) {
352352
final long y = UNSAFE.getLong(x+=8);
353-
if((y & LONG_MASK) != 0) {
353+
if ((y & LONG_MASK) != 0) {
354354
testLongPositive(y, (int) (x-sieveArrayAddress));
355355
}
356356
}
@@ -424,9 +424,9 @@ public SieveResult sieve() {
424424
} // end for (p)
425425
if (ANALYZE) sieveDuration += timer.capture();
426426

427-
// collect results
427+
// collect results: we check 8 sieve locations in one long and 32 longs at once
428428
x = sieveArrayAddress-8;
429-
while (x<sieveArrayAddress+sieveArraySize-8) {
429+
while (x < sieveArrayAddress+sieveArraySize-8) {
430430
long t = UNSAFE.getLong(x+=8);
431431
t |= UNSAFE.getLong(x+=8);
432432
t |= UNSAFE.getLong(x+=8);
@@ -459,14 +459,14 @@ public SieveResult sieve() {
459459
t |= UNSAFE.getLong(x+=8);
460460
t |= UNSAFE.getLong(x+=8);
461461
t |= UNSAFE.getLong(x+=8);
462-
if((t & LONG_MASK) == 0) continue;
462+
if ((t & LONG_MASK) == 0) continue;
463463

464-
// back up to get the last 8 and look in more detail
464+
// go back and look in more detail
465465
x -= 256;
466466

467-
for(int l=0; l<32; l++) {
467+
for (int l=0; l<32; l++) {
468468
final long y = UNSAFE.getLong(x+=8);
469-
if((y & LONG_MASK) != 0) {
469+
if ((y & LONG_MASK) != 0) {
470470
testLongNegative(y, (int) (x-sieveArrayAddress));
471471
}
472472
}

0 commit comments

Comments
 (0)