66import static org .junit .jupiter .api .Assertions .assertEquals ;
77import static org .junit .jupiter .api .Assertions .assertTrue ;
88
9+ import crawlercommons .urlfrontier .Urlfrontier .AckMessage ;
10+ import crawlercommons .urlfrontier .Urlfrontier .DiscoveredURLItem ;
911import crawlercommons .urlfrontier .Urlfrontier .ListUrlParams ;
12+ import crawlercommons .urlfrontier .Urlfrontier .StringList ;
13+ import crawlercommons .urlfrontier .Urlfrontier .URLInfo ;
1014import crawlercommons .urlfrontier .Urlfrontier .URLItem ;
1115import crawlercommons .urlfrontier .Urlfrontier .URLStatusRequest ;
1216import crawlercommons .urlfrontier .service .memory .MemoryFrontierService ;
1317import io .grpc .stub .StreamObserver ;
1418import java .util .Iterator ;
1519import java .util .Map .Entry ;
20+ import java .util .concurrent .atomic .AtomicBoolean ;
1621import java .util .concurrent .atomic .AtomicInteger ;
1722import org .junit .jupiter .api .BeforeAll ;
23+ import org .junit .jupiter .api .MethodOrderer ;
24+ import org .junit .jupiter .api .Order ;
1825import org .junit .jupiter .api .Test ;
26+ import org .junit .jupiter .api .TestMethodOrder ;
1927import org .slf4j .LoggerFactory ;
2028
29+ @ TestMethodOrder (MethodOrderer .OrderAnnotation .class )
2130class MemoryFrontierServiceTest {
2231
2332 private static final org .slf4j .Logger LOG =
@@ -32,6 +41,7 @@ static void setup() {
3241 }
3342
3443 @ Test
44+ @ Order (1 )
3545 void testGetStatusDiscovered () {
3646
3747 String crawlId = "crawl_id" ;
@@ -77,6 +87,7 @@ public void onCompleted() {
7787 }
7888
7989 @ Test
90+ @ Order (2 )
8091 void testGetStatusCompleted () {
8192
8293 String crawlId = "crawl_id" ;
@@ -110,7 +121,7 @@ public void onError(Throwable t) {
110121
111122 @ Override
112123 public void onCompleted () {
113- LOG .info ("completed testGetStatusKnown " );
124+ LOG .info ("completed testGetStatusCompleted " );
114125 }
115126 };
116127
@@ -121,6 +132,7 @@ public void onCompleted() {
121132 }
122133
123134 @ Test
135+ @ Order (3 )
124136 void testNotFound () {
125137 String crawlId = "crawl_id" ;
126138 String url = "https://www.example3.com" ;
@@ -169,6 +181,7 @@ private void logURLItem(URLItem item) {
169181 }
170182
171183 @ Test
184+ @ Order (4 )
172185 void testGetStatusToRefetch () {
173186
174187 String crawlId = "crawl_id" ;
@@ -205,7 +218,7 @@ public void onError(Throwable t) {
205218
206219 @ Override
207220 public void onCompleted () {
208- LOG .info ("completed testGetStatusKnown " );
221+ LOG .info ("completed testGetStatusToRefetch " );
209222 }
210223 };
211224
@@ -216,6 +229,7 @@ public void onCompleted() {
216229 }
217230
218231 @ Test
232+ @ Order (5 )
219233 void testListAllURLs () {
220234
221235 ListUrlParams params =
@@ -254,6 +268,7 @@ public void onCompleted() {
254268 }
255269
256270 @ Test
271+ @ Order (6 )
257272 void testListURLsinglequeue () {
258273
259274 ListUrlParams params =
@@ -296,6 +311,7 @@ public void onCompleted() {
296311 }
297312
298313 @ Test
314+ @ Order (7 )
299315 void testMemoryIterator () {
300316 int nbQueues = 0 ;
301317 int nbUrls = 0 ;
@@ -317,6 +333,7 @@ void testMemoryIterator() {
317333 }
318334
319335 @ Test
336+ @ Order (8 )
320337 void testMemoryIteratorSingleQueue () {
321338 int nbQueues = 0 ;
322339 int nbUrls = 0 ;
@@ -340,4 +357,133 @@ void testMemoryIteratorSingleQueue() {
340357 assertEquals (1 , nbQueues );
341358 assertEquals (3 , nbUrls );
342359 }
360+
361+ @ Test
362+ @ Order (99 )
363+ void testNoRescheduleCompleted () {
364+
365+ String crawlId = "crawl_id" ;
366+ String url2 = "https://www.mysite.com/completed" ;
367+ String key2 = "queue_mysite" ;
368+ StringList sl2 = StringList .newBuilder ().addValues ("md2" ).build ();
369+
370+ crawlercommons .urlfrontier .Urlfrontier .URLItem .Builder builder1 = URLItem .newBuilder ();
371+
372+ StreamObserver <URLItem > statusObserver =
373+ new StreamObserver <>() {
374+
375+ @ Override
376+ public void onNext (URLItem value ) {
377+ // receives confirmation that the value has been received
378+ logURLItem (value );
379+
380+ // Internally, MemoryFrontierService does not make a distinction
381+ // between discovered and known which have to be re-fetched
382+ if (value .hasKnown ()) {
383+ assertEquals (0 , value .getKnown ().getRefetchableFromDate ());
384+ }
385+ }
386+
387+ @ Override
388+ public void onError (Throwable t ) {
389+ t .printStackTrace ();
390+ }
391+
392+ @ Override
393+ public void onCompleted () {
394+ LOG .info ("completed testNoRescheduleCompleted 1/2" );
395+ }
396+ };
397+
398+ // First check that we have the URL as Known URL with a refetch date of 0
399+ URLStatusRequest request =
400+ URLStatusRequest .newBuilder ().setCrawlID (crawlId ).setUrl (url2 ).setKey (key2 ).build ();
401+
402+ memoryFrontierService .getURLStatus (request , statusObserver );
403+
404+ // PutURL for the same URL with Discovered status
405+ URLInfo info2 =
406+ URLInfo .newBuilder ()
407+ .setUrl (url2 )
408+ .setCrawlID (crawlId )
409+ .setKey (key2 )
410+ .putMetadata ("meta1" , sl2 )
411+ .build ();
412+
413+ DiscoveredURLItem disco2 = DiscoveredURLItem .newBuilder ().setInfo (info2 ).build ();
414+ builder1 .clear ();
415+ builder1 .setDiscovered (disco2 );
416+ builder1 .setID (crawlId + "_" + url2 );
417+
418+ final AtomicBoolean completed = new AtomicBoolean (false );
419+ final AtomicInteger acked = new AtomicInteger (0 );
420+ final AtomicInteger failed = new AtomicInteger (0 );
421+ final AtomicInteger skipped = new AtomicInteger (0 );
422+ final AtomicInteger ok = new AtomicInteger (0 );
423+ StreamObserver <crawlercommons .urlfrontier .Urlfrontier .AckMessage > responseObserver =
424+ new StreamObserver <>() {
425+
426+ @ Override
427+ public void onNext (crawlercommons .urlfrontier .Urlfrontier .AckMessage value ) {
428+ // receives confirmation that the value has been received
429+ acked .addAndGet (1 );
430+ if (value .getStatus ().equals (AckMessage .Status .SKIPPED )) {
431+ skipped .getAndIncrement ();
432+ LOG .info ("PutURL skipped" );
433+ } else if (value .getStatus ().equals (AckMessage .Status .FAIL )) {
434+ failed .getAndIncrement ();
435+ LOG .info ("PutURL failed" );
436+ } else if (value .getStatus ().equals (AckMessage .Status .OK )) {
437+ ok .getAndIncrement ();
438+ LOG .info ("PutURL OK" );
439+ }
440+ }
441+
442+ @ Override
443+ public void onError (Throwable t ) {
444+ completed .set (true );
445+ t .printStackTrace ();
446+ }
447+
448+ @ Override
449+ public void onCompleted () {
450+ completed .set (true );
451+ LOG .info ("Completed putURL" );
452+ }
453+ };
454+
455+ StreamObserver <URLItem > streamObserver = memoryFrontierService .putURLs (responseObserver );
456+ streamObserver .onNext (builder1 .build ());
457+ streamObserver .onCompleted ();
458+
459+ assertEquals (1 , skipped .get ());
460+
461+ StreamObserver <URLItem > statusObserver2 =
462+ new StreamObserver <>() {
463+
464+ @ Override
465+ public void onNext (URLItem value ) {
466+ // receives confirmation that the value has been received
467+ logURLItem (value );
468+
469+ // Internally, MemoryFrontierService does not make a distinction
470+ // between discovered and known which have to be re-fetched
471+ if (value .hasKnown ()) {
472+ assertEquals (0 , value .getKnown ().getRefetchableFromDate ());
473+ }
474+ }
475+
476+ @ Override
477+ public void onError (Throwable t ) {
478+ t .printStackTrace ();
479+ }
480+
481+ @ Override
482+ public void onCompleted () {
483+ LOG .info ("completed testNoRescheduleCompleted 2/2" );
484+ }
485+ };
486+
487+ memoryFrontierService .getURLStatus (request , statusObserver2 );
488+ }
343489}
0 commit comments