Skip to content

Commit 2be58f8

Browse files
authored
Merge pull request #101 from klockla/testcase_94
Added test case for discussion #94
2 parents 350bd1b + 054bd6b commit 2be58f8

File tree

3 files changed

+299
-3
lines changed

3 files changed

+299
-3
lines changed

service/src/test/java/crawlercommons/urlfrontier/service/MemoryFrontierServiceTest.java

Lines changed: 148 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,27 @@
66
import static org.junit.jupiter.api.Assertions.assertEquals;
77
import static org.junit.jupiter.api.Assertions.assertTrue;
88

9+
import crawlercommons.urlfrontier.Urlfrontier.AckMessage;
10+
import crawlercommons.urlfrontier.Urlfrontier.DiscoveredURLItem;
911
import crawlercommons.urlfrontier.Urlfrontier.ListUrlParams;
12+
import crawlercommons.urlfrontier.Urlfrontier.StringList;
13+
import crawlercommons.urlfrontier.Urlfrontier.URLInfo;
1014
import crawlercommons.urlfrontier.Urlfrontier.URLItem;
1115
import crawlercommons.urlfrontier.Urlfrontier.URLStatusRequest;
1216
import crawlercommons.urlfrontier.service.memory.MemoryFrontierService;
1317
import io.grpc.stub.StreamObserver;
1418
import java.util.Iterator;
1519
import java.util.Map.Entry;
20+
import java.util.concurrent.atomic.AtomicBoolean;
1621
import java.util.concurrent.atomic.AtomicInteger;
1722
import org.junit.jupiter.api.BeforeAll;
23+
import org.junit.jupiter.api.MethodOrderer;
24+
import org.junit.jupiter.api.Order;
1825
import org.junit.jupiter.api.Test;
26+
import org.junit.jupiter.api.TestMethodOrder;
1927
import org.slf4j.LoggerFactory;
2028

29+
@TestMethodOrder(MethodOrderer.OrderAnnotation.class)
2130
class MemoryFrontierServiceTest {
2231

2332
private static final org.slf4j.Logger LOG =
@@ -32,6 +41,7 @@ static void setup() {
3241
}
3342

3443
@Test
44+
@Order(1)
3545
void testGetStatusDiscovered() {
3646

3747
String crawlId = "crawl_id";
@@ -77,6 +87,7 @@ public void onCompleted() {
7787
}
7888

7989
@Test
90+
@Order(2)
8091
void testGetStatusCompleted() {
8192

8293
String crawlId = "crawl_id";
@@ -110,7 +121,7 @@ public void onError(Throwable t) {
110121

111122
@Override
112123
public void onCompleted() {
113-
LOG.info("completed testGetStatusKnown");
124+
LOG.info("completed testGetStatusCompleted");
114125
}
115126
};
116127

@@ -121,6 +132,7 @@ public void onCompleted() {
121132
}
122133

123134
@Test
135+
@Order(3)
124136
void testNotFound() {
125137
String crawlId = "crawl_id";
126138
String url = "https://www.example3.com";
@@ -169,6 +181,7 @@ private void logURLItem(URLItem item) {
169181
}
170182

171183
@Test
184+
@Order(4)
172185
void testGetStatusToRefetch() {
173186

174187
String crawlId = "crawl_id";
@@ -205,7 +218,7 @@ public void onError(Throwable t) {
205218

206219
@Override
207220
public void onCompleted() {
208-
LOG.info("completed testGetStatusKnown");
221+
LOG.info("completed testGetStatusToRefetch");
209222
}
210223
};
211224

@@ -216,6 +229,7 @@ public void onCompleted() {
216229
}
217230

218231
@Test
232+
@Order(5)
219233
void testListAllURLs() {
220234

221235
ListUrlParams params =
@@ -254,6 +268,7 @@ public void onCompleted() {
254268
}
255269

256270
@Test
271+
@Order(6)
257272
void testListURLsinglequeue() {
258273

259274
ListUrlParams params =
@@ -296,6 +311,7 @@ public void onCompleted() {
296311
}
297312

298313
@Test
314+
@Order(7)
299315
void testMemoryIterator() {
300316
int nbQueues = 0;
301317
int nbUrls = 0;
@@ -317,6 +333,7 @@ void testMemoryIterator() {
317333
}
318334

319335
@Test
336+
@Order(8)
320337
void testMemoryIteratorSingleQueue() {
321338
int nbQueues = 0;
322339
int nbUrls = 0;
@@ -340,4 +357,133 @@ void testMemoryIteratorSingleQueue() {
340357
assertEquals(1, nbQueues);
341358
assertEquals(3, nbUrls);
342359
}
360+
361+
@Test
362+
@Order(99)
363+
void testNoRescheduleCompleted() {
364+
365+
String crawlId = "crawl_id";
366+
String url2 = "https://www.mysite.com/completed";
367+
String key2 = "queue_mysite";
368+
StringList sl2 = StringList.newBuilder().addValues("md2").build();
369+
370+
crawlercommons.urlfrontier.Urlfrontier.URLItem.Builder builder1 = URLItem.newBuilder();
371+
372+
StreamObserver<URLItem> statusObserver =
373+
new StreamObserver<>() {
374+
375+
@Override
376+
public void onNext(URLItem value) {
377+
// receives confirmation that the value has been received
378+
logURLItem(value);
379+
380+
// Internally, MemoryFrontierService does not make a distinction
381+
// between discovered and known which have to be re-fetched
382+
if (value.hasKnown()) {
383+
assertEquals(0, value.getKnown().getRefetchableFromDate());
384+
}
385+
}
386+
387+
@Override
388+
public void onError(Throwable t) {
389+
t.printStackTrace();
390+
}
391+
392+
@Override
393+
public void onCompleted() {
394+
LOG.info("completed testNoRescheduleCompleted 1/2");
395+
}
396+
};
397+
398+
// First check that we have the URL as Known URL with a refetch date of 0
399+
URLStatusRequest request =
400+
URLStatusRequest.newBuilder().setCrawlID(crawlId).setUrl(url2).setKey(key2).build();
401+
402+
memoryFrontierService.getURLStatus(request, statusObserver);
403+
404+
// PutURL for the same URL with Discovered status
405+
URLInfo info2 =
406+
URLInfo.newBuilder()
407+
.setUrl(url2)
408+
.setCrawlID(crawlId)
409+
.setKey(key2)
410+
.putMetadata("meta1", sl2)
411+
.build();
412+
413+
DiscoveredURLItem disco2 = DiscoveredURLItem.newBuilder().setInfo(info2).build();
414+
builder1.clear();
415+
builder1.setDiscovered(disco2);
416+
builder1.setID(crawlId + "_" + url2);
417+
418+
final AtomicBoolean completed = new AtomicBoolean(false);
419+
final AtomicInteger acked = new AtomicInteger(0);
420+
final AtomicInteger failed = new AtomicInteger(0);
421+
final AtomicInteger skipped = new AtomicInteger(0);
422+
final AtomicInteger ok = new AtomicInteger(0);
423+
StreamObserver<crawlercommons.urlfrontier.Urlfrontier.AckMessage> responseObserver =
424+
new StreamObserver<>() {
425+
426+
@Override
427+
public void onNext(crawlercommons.urlfrontier.Urlfrontier.AckMessage value) {
428+
// receives confirmation that the value has been received
429+
acked.addAndGet(1);
430+
if (value.getStatus().equals(AckMessage.Status.SKIPPED)) {
431+
skipped.getAndIncrement();
432+
LOG.info("PutURL skipped");
433+
} else if (value.getStatus().equals(AckMessage.Status.FAIL)) {
434+
failed.getAndIncrement();
435+
LOG.info("PutURL failed");
436+
} else if (value.getStatus().equals(AckMessage.Status.OK)) {
437+
ok.getAndIncrement();
438+
LOG.info("PutURL OK");
439+
}
440+
}
441+
442+
@Override
443+
public void onError(Throwable t) {
444+
completed.set(true);
445+
t.printStackTrace();
446+
}
447+
448+
@Override
449+
public void onCompleted() {
450+
completed.set(true);
451+
LOG.info("Completed putURL");
452+
}
453+
};
454+
455+
StreamObserver<URLItem> streamObserver = memoryFrontierService.putURLs(responseObserver);
456+
streamObserver.onNext(builder1.build());
457+
streamObserver.onCompleted();
458+
459+
assertEquals(1, skipped.get());
460+
461+
StreamObserver<URLItem> statusObserver2 =
462+
new StreamObserver<>() {
463+
464+
@Override
465+
public void onNext(URLItem value) {
466+
// receives confirmation that the value has been received
467+
logURLItem(value);
468+
469+
// Internally, MemoryFrontierService does not make a distinction
470+
// between discovered and known which have to be re-fetched
471+
if (value.hasKnown()) {
472+
assertEquals(0, value.getKnown().getRefetchableFromDate());
473+
}
474+
}
475+
476+
@Override
477+
public void onError(Throwable t) {
478+
t.printStackTrace();
479+
}
480+
481+
@Override
482+
public void onCompleted() {
483+
LOG.info("completed testNoRescheduleCompleted 2/2");
484+
}
485+
};
486+
487+
memoryFrontierService.getURLStatus(request, statusObserver2);
488+
}
343489
}

0 commit comments

Comments
 (0)