1717import static de .tilman_neumann .jml .factor .base .GlobalFactoringOptions .*;
1818
1919import java .math .BigInteger ;
20+ import java .nio .ByteBuffer ;
21+ import java .nio .ByteOrder ;
2022
2123import org .apache .logging .log4j .Logger ;
2224import org .apache .logging .log4j .LogManager ;
6567 *
6668 * Version 03h:
6769 * -> unroll largest primes with the same logP value
70+ * -> using a ByteBuffer allows to access the byte array as longs in the collect phase
6871 *
6972 * @author Tilman Neumann
7073 */
7174public class Sieve03h implements Sieve {
7275 private static final Logger LOG = LogManager .getLogger (Sieve03h .class );
7376 private static final boolean DEBUG = false ;
7477
78+ private static final long LONG_MASK = 0x8080808080808080L ;
79+ private static final long UPPER_MASK = 0x8080808000000000L ;
80+ private static final long LOWER_MASK = 0x80808080L ;
81+
7582 private static final double LN2 = Math .log (2.0 );
7683
7784 private BigInteger daParam , bParam , cParam , kN ;
@@ -106,12 +113,15 @@ public class Sieve03h implements Sieve {
106113
107114 // sieve
108115 private int sieveArraySize ;
116+ private int sieveAllocationSize ;
109117 /** the initalizer value */
110118 private byte initializerValue ;
111119 /** basic building block for fast initialization of sieve array */
112120 private byte [] initializerBlock ;
113121 /** the array holding logP sums for all x */
114122 private byte [] sieveArray ;
123+ /** a view on the sieveArray that allows to access it in pieces of longs */
124+ ByteBuffer sieveBuffer ;
115125
116126 /** buffers for trial division engine. */
117127 private UnsignedBigInt Q_rest_UBI = new UnsignedBigInt (new int [50 ]);
@@ -159,9 +169,11 @@ public void initializeForN(SieveParams sieveParams, BaseArrays baseArrays, int m
159169 // For primes p[i], i<p1Index, we need p[i]+sieveArraySize = 2*sieveArraySize entries.
160170 this .sieveArraySize = sieveParams .sieveArraySize ;
161171 int pMax = sieveParams .pMax ;
162- int sieveAllocationSize = Math .max (pMax +1 , 2 *sieveArraySize );
172+ sieveAllocationSize = Math .max (pMax +1 , 2 *sieveArraySize );
163173 sieveArray = new byte [sieveAllocationSize ];
164- if (DEBUG ) LOG .debug ("pMax = " + pMax + ", sieveArraySize = " + sieveArraySize + " --> sieveAllocationSize = " + sieveAllocationSize );
174+ sieveBuffer = ByteBuffer .wrap (sieveArray ).order (ByteOrder .LITTLE_ENDIAN );
175+
176+ if (DEBUG ) LOG .debug ("pMax = " + pMax + ", sieveArraySize = " + sieveArraySize + " --> sieveAllocationSize = " + sieveAllocationSize );
165177
166178 if (ANALYZE ) {
167179 sieveHitCount = 0 ;
@@ -299,17 +311,56 @@ public SieveResult sieve() {
299311 } // end for (p)
300312 if (ANALYZE ) sieveDuration += timer .capture ();
301313
302- // collect results
303- // let the sieve entry counter x run down to 0 is much faster because of the simpler exit condition
304- for (int x =sieveArraySize -1 ; x >=0 ; ) {
305- // Unfortunately, in Java we can not cast byte[] to int[] or long[].
306- // So we have to use 'or'. More than 4 'or's do not pay out.
307- if (((sieveArray [x --] | sieveArray [x --] | sieveArray [x --] | sieveArray [x --]) & 0x80 ) != 0 ) {
308- // at least one of the tested Q(x) is sufficiently smooth to be passed to trial division!
309- if (sieveArray [x +1 ] < 0 ) addSmoothCandidate (x +1 , sieveArray [x +1 ] & 0xFF );
310- if (sieveArray [x +2 ] < 0 ) addSmoothCandidate (x +2 , sieveArray [x +2 ] & 0xFF );
311- if (sieveArray [x +3 ] < 0 ) addSmoothCandidate (x +3 , sieveArray [x +3 ] & 0xFF );
312- if (sieveArray [x +4 ] < 0 ) addSmoothCandidate (x +4 , sieveArray [x +4 ] & 0xFF );
314+ sieveBuffer .rewind ();
315+
316+ // collect results: we check 8 sieve locations in one long and 32 longs at once
317+ int x = 0 ;
318+ int sieveArraySizeLong = sieveArraySize >> 3 ;
319+ while (x < sieveArraySizeLong ) {
320+ long t = sieveBuffer .getLong ();
321+ t |= sieveBuffer .getLong ();
322+ t |= sieveBuffer .getLong ();
323+ t |= sieveBuffer .getLong ();
324+ t |= sieveBuffer .getLong ();
325+ t |= sieveBuffer .getLong ();
326+ t |= sieveBuffer .getLong ();
327+ t |= sieveBuffer .getLong ();
328+ t |= sieveBuffer .getLong ();
329+ t |= sieveBuffer .getLong ();
330+ t |= sieveBuffer .getLong ();
331+ t |= sieveBuffer .getLong ();
332+ t |= sieveBuffer .getLong ();
333+ t |= sieveBuffer .getLong ();
334+ t |= sieveBuffer .getLong ();
335+ t |= sieveBuffer .getLong ();
336+ t |= sieveBuffer .getLong ();
337+ t |= sieveBuffer .getLong ();
338+ t |= sieveBuffer .getLong ();
339+ t |= sieveBuffer .getLong ();
340+ t |= sieveBuffer .getLong ();
341+ t |= sieveBuffer .getLong ();
342+ t |= sieveBuffer .getLong ();
343+ t |= sieveBuffer .getLong ();
344+ t |= sieveBuffer .getLong ();
345+ t |= sieveBuffer .getLong ();
346+ t |= sieveBuffer .getLong ();
347+ t |= sieveBuffer .getLong ();
348+ t |= sieveBuffer .getLong ();
349+ t |= sieveBuffer .getLong ();
350+ t |= sieveBuffer .getLong ();
351+ t |= sieveBuffer .getLong ();
352+ x += 32 ;
353+ if ((t & LONG_MASK ) == 0 ) continue ;
354+
355+ // go back and look in more detail
356+ x -= 32 ;
357+ sieveBuffer .position (x <<3 );
358+
359+ for (int l =0 ; l <32 ; l ++, x ++) {
360+ final long y = sieveBuffer .getLong ();
361+ if ((y & LONG_MASK ) != 0 ) {
362+ testLongPositive (y , x <<3 );
363+ }
313364 }
314365 }
315366 if (ANALYZE ) collectDuration += timer .capture ();
@@ -373,17 +424,55 @@ public SieveResult sieve() {
373424 } // end for (p)
374425 if (ANALYZE ) sieveDuration += timer .capture ();
375426
376- // collect results
377- // let the sieve entry counter x run down to 0 is much faster because of the simpler exit condition
378- for (int x =sieveArraySize -1 ; x >=0 ; ) {
379- // Unfortunately, in Java we can not cast byte[] to int[] or long[].
380- // So we have to use 'or'. More than 4 'or's do not pay out.
381- if (((sieveArray [x --] | sieveArray [x --] | sieveArray [x --] | sieveArray [x --]) & 0x80 ) != 0 ) {
382- // at least one of the tested Q(-x) is sufficiently smooth to be passed to trial division!
383- if (sieveArray [x +1 ] < 0 ) addSmoothCandidate (-(x +1 ), sieveArray [x +1 ] & 0xFF );
384- if (sieveArray [x +2 ] < 0 ) addSmoothCandidate (-(x +2 ), sieveArray [x +2 ] & 0xFF );
385- if (sieveArray [x +3 ] < 0 ) addSmoothCandidate (-(x +3 ), sieveArray [x +3 ] & 0xFF );
386- if (sieveArray [x +4 ] < 0 ) addSmoothCandidate (-(x +4 ), sieveArray [x +4 ] & 0xFF );
427+ sieveBuffer .rewind ();
428+
429+ // collect results: we check 8 sieve locations in one long and 32 longs at once
430+ x = 0 ;
431+ while (x < sieveArraySizeLong ) {
432+ long t = sieveBuffer .getLong ();
433+ t |= sieveBuffer .getLong ();
434+ t |= sieveBuffer .getLong ();
435+ t |= sieveBuffer .getLong ();
436+ t |= sieveBuffer .getLong ();
437+ t |= sieveBuffer .getLong ();
438+ t |= sieveBuffer .getLong ();
439+ t |= sieveBuffer .getLong ();
440+ t |= sieveBuffer .getLong ();
441+ t |= sieveBuffer .getLong ();
442+ t |= sieveBuffer .getLong ();
443+ t |= sieveBuffer .getLong ();
444+ t |= sieveBuffer .getLong ();
445+ t |= sieveBuffer .getLong ();
446+ t |= sieveBuffer .getLong ();
447+ t |= sieveBuffer .getLong ();
448+ t |= sieveBuffer .getLong ();
449+ t |= sieveBuffer .getLong ();
450+ t |= sieveBuffer .getLong ();
451+ t |= sieveBuffer .getLong ();
452+ t |= sieveBuffer .getLong ();
453+ t |= sieveBuffer .getLong ();
454+ t |= sieveBuffer .getLong ();
455+ t |= sieveBuffer .getLong ();
456+ t |= sieveBuffer .getLong ();
457+ t |= sieveBuffer .getLong ();
458+ t |= sieveBuffer .getLong ();
459+ t |= sieveBuffer .getLong ();
460+ t |= sieveBuffer .getLong ();
461+ t |= sieveBuffer .getLong ();
462+ t |= sieveBuffer .getLong ();
463+ t |= sieveBuffer .getLong ();
464+ x += 32 ;
465+ if ((t & LONG_MASK ) == 0 ) continue ;
466+
467+ // go back and look in more detail
468+ x -= 32 ;
469+ sieveBuffer .position (x <<3 );
470+
471+ for (int l =0 ; l <32 ; l ++, x ++) {
472+ final long y = sieveBuffer .getLong ();
473+ if ((y & LONG_MASK ) != 0 ) {
474+ testLongNegative (y , x <<3 );
475+ }
387476 }
388477 }
389478 if (ANALYZE ) collectDuration += timer .capture ();
@@ -407,6 +496,40 @@ private void initializeSieveArray(int sieveArraySize) {
407496 }
408497 }
409498
499+ private void testLongPositive (long y , int x ) {
500+ if ((y & LOWER_MASK ) != 0 ) {
501+ final int y0 = (int ) y ;
502+ if ((y0 & 0x80 ) != 0 ) addSmoothCandidate (x , y0 & 0xFF );
503+ if ((y0 & 0x8000 ) != 0 ) addSmoothCandidate (x +1 , (y0 >> 8 ) & 0xFF );
504+ if ((y0 & 0x800000 ) != 0 ) addSmoothCandidate (x +2 , (y0 >>16 ) & 0xFF );
505+ if ((y0 & 0x80000000 ) != 0 ) addSmoothCandidate (x +3 , (y0 >>24 ) & 0xFF );
506+ }
507+ if ((y & UPPER_MASK ) != 0 ) {
508+ final int y1 = (int ) (y >> 32 );
509+ if ((y1 & 0x80 ) != 0 ) addSmoothCandidate (x +4 , y1 & 0xFF );
510+ if ((y1 & 0x8000 ) != 0 ) addSmoothCandidate (x +5 , (y1 >> 8 ) & 0xFF );
511+ if ((y1 & 0x800000 ) != 0 ) addSmoothCandidate (x +6 , (y1 >>16 ) & 0xFF );
512+ if ((y1 & 0x80000000 ) != 0 ) addSmoothCandidate (x +7 , (y1 >>24 ) & 0xFF );
513+ }
514+ }
515+
516+ private void testLongNegative (long y , int x ) {
517+ if ((y & LOWER_MASK ) != 0 ) {
518+ final int y0 = (int ) y ;
519+ if ((y0 & 0x80 ) != 0 ) addSmoothCandidate (- x , y0 & 0xFF );
520+ if ((y0 & 0x8000 ) != 0 ) addSmoothCandidate (-(x +1 ), (y0 >> 8 ) & 0xFF );
521+ if ((y0 & 0x800000 ) != 0 ) addSmoothCandidate (-(x +2 ), (y0 >>16 ) & 0xFF );
522+ if ((y0 & 0x80000000 ) != 0 ) addSmoothCandidate (-(x +3 ), (y0 >>24 ) & 0xFF );
523+ }
524+ if ((y & UPPER_MASK ) != 0 ) {
525+ final int y1 = (int ) (y >> 32 );
526+ if ((y1 & 0x80 ) != 0 ) addSmoothCandidate (-(x +4 ), y1 & 0xFF );
527+ if ((y1 & 0x8000 ) != 0 ) addSmoothCandidate (-(x +5 ), (y1 >> 8 ) & 0xFF );
528+ if ((y1 & 0x800000 ) != 0 ) addSmoothCandidate (-(x +6 ), (y1 >>16 ) & 0xFF );
529+ if ((y1 & 0x80000000 ) != 0 ) addSmoothCandidate (-(x +7 ), (y1 >>24 ) & 0xFF );
530+ }
531+ }
532+
410533 private void addSmoothCandidate (int x , int score ) {
411534 if (ANALYZE ) sieveHitCount ++;
412535
0 commit comments