use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class BloomFilterDuplicateRemoverTest method testMissHit.
@Ignore("long time")
@Test
public void testMissHit() throws Exception {
int times = 5000000;
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.01);
int right = 0;
int wrong = 0;
int missCheck = 0;
for (int i = 0; i < times; i++) {
boolean duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
if (duplicate) {
wrong++;
} else {
right++;
}
duplicate = duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
if (!duplicate) {
missCheck++;
}
}
System.out.println("Right count: " + right + " Wrong count: " + wrong + " Miss check: " + missCheck);
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class RedisPrioritySchedulerTest method test.
@Ignore("environment depended")
@Test
public void test() {
Task task = new Task() {
@Override
public String getUUID() {
return "TestTask";
}
@Override
public Site getSite() {
return null;
}
};
scheduler.resetDuplicateCheck(task);
Request request = new Request("https://www.google.com");
Request request1 = new Request("https://www.facebook.com/");
Request request2 = new Request("https://twitter.com");
request.setPriority(1).putExtra("name", "google");
request1.setPriority(0).putExtra("name", "facebook");
request2.setPriority(-1).putExtra("name", "twitter");
scheduler.push(request, task);
scheduler.push(request1, task);
scheduler.push(request2, task);
Request GRequest = scheduler.poll(task);
Request FBRequest = scheduler.poll(task);
Request TRequest = scheduler.poll(task);
Assert.assertEquals(GRequest.getUrl(), request.getUrl());
Assert.assertEquals(GRequest.getExtra("name"), request.getExtra("name"));
Assert.assertEquals(FBRequest.getUrl(), request1.getUrl());
Assert.assertEquals(FBRequest.getExtra("name"), request.getExtra("name"));
Assert.assertEquals(TRequest.getUrl(), request2.getUrl());
Assert.assertEquals(TRequest.getExtra("name"), request.getExtra("name"));
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class RedisSchedulerTest method test.
@Ignore("environment depended")
@Test
public void test() {
Task task = new Task() {
@Override
public String getUUID() {
return "1";
}
@Override
public Site getSite() {
return null;
}
};
Request request = new Request("http://www.ibm.com/developerworks/cn/java/j-javadev2-22/");
request.putExtra("1", "2");
redisScheduler.push(request, task);
Request poll = redisScheduler.poll(task);
System.out.println(poll);
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class FilePipelineTest method before.
@BeforeClass
public static void before() {
resultItems = new ResultItems();
resultItems.put("content", "webmagic 爬虫工具");
Request request = new Request("http://www.baidu.com");
resultItems.setRequest(request);
task = new Task() {
@Override
public String getUUID() {
return UUID.randomUUID().toString();
}
@Override
public Site getSite() {
return null;
}
};
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class DuplicateRemovedSchedulerTest method test_no_duplicate_removed_for_post_request.
@Test
public void test_no_duplicate_removed_for_post_request() throws Exception {
DuplicateRemover duplicateRemover = Mockito.mock(DuplicateRemover.class);
duplicateRemovedScheduler.setDuplicateRemover(duplicateRemover);
Request request = new Request("https://www.google.com/");
request.setMethod(HttpConstant.Method.POST);
duplicateRemovedScheduler.push(request, null);
verify(duplicateRemover, times(0)).isDuplicate(any(Request.class), any(Task.class));
}
Aggregations