use of us.codecraft.webmagic.scheduler.component.DuplicateRemover in project webmagic by code4craft.
the class BloomFilterDuplicateRemoverTest method testMemory.
@Ignore("long time")
@Test
public void testMemory() throws Exception {
int times = 5000000;
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.005);
long freeMemory = Runtime.getRuntime().freeMemory();
long time = System.currentTimeMillis();
for (int i = 0; i < times; i++) {
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
}
System.out.println("Time used by bloomfilter:" + (System.currentTimeMillis() - time));
System.out.println("Memory used by bloomfilter:" + (freeMemory - Runtime.getRuntime().freeMemory()));
duplicateRemover = new HashSetDuplicateRemover();
System.gc();
freeMemory = Runtime.getRuntime().freeMemory();
time = System.currentTimeMillis();
for (int i = 0; i < times; i++) {
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
}
System.out.println("Time used by hashset:" + (System.currentTimeMillis() - time));
System.out.println("Memory used by hashset:" + (freeMemory - Runtime.getRuntime().freeMemory()));
}
use of us.codecraft.webmagic.scheduler.component.DuplicateRemover in project webmagic by code4craft.
the class BloomFilterDuplicateRemoverTest method testMissHit.
@Ignore("long time")
@Test
public void testMissHit() throws Exception {
int times = 5000000;
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.01);
int right = 0;
int wrong = 0;
int missCheck = 0;
for (int i = 0; i < times; i++) {
boolean duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
if (duplicate) {
wrong++;
} else {
right++;
}
duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
if (!duplicate) {
missCheck++;
}
}
System.out.println("Right count: " + right + " Wrong count: " + wrong + " Miss check: " + missCheck);
}
use of us.codecraft.webmagic.scheduler.component.DuplicateRemover in project webmagic by code4craft.
the class DuplicateRemovedSchedulerTest method test_no_duplicate_removed_for_post_request.
@Test
public void test_no_duplicate_removed_for_post_request() throws Exception {
DuplicateRemover duplicateRemover = Mockito.mock(DuplicateRemover.class);
duplicateRemovedScheduler.setDuplicateRemover(duplicateRemover);
Request request = new Request("https://www.google.com/");
request.setMethod(HttpConstant.Method.POST);
duplicateRemovedScheduler.push(request, null);
verify(duplicateRemover, times(0)).isDuplicate(any(Request.class), any(Task.class));
}
use of us.codecraft.webmagic.scheduler.component.DuplicateRemover in project webmagic by code4craft.
the class DuplicateRemovedSchedulerTest method test_duplicate_removed_for_get_request.
@Test
public void test_duplicate_removed_for_get_request() throws Exception {
DuplicateRemover duplicateRemover = Mockito.mock(DuplicateRemover.class);
duplicateRemovedScheduler.setDuplicateRemover(duplicateRemover);
Request request = new Request("https://www.google.com/");
request.setMethod(HttpConstant.Method.GET);
duplicateRemovedScheduler.push(request, null);
verify(duplicateRemover, times(1)).isDuplicate(any(Request.class), any(Task.class));
}
Aggregations