use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class FileCacheQueueScheduler method readUrlFile.
private void readUrlFile() throws IOException {
String line;
BufferedReader fileUrlReader = null;
try {
fileUrlReader = new BufferedReader(new FileReader(getFileName(fileUrlAllName)));
int lineReaded = 0;
while ((line = fileUrlReader.readLine()) != null) {
urls.add(line.trim());
lineReaded++;
if (lineReaded > cursor.get()) {
queue.add(new Request(line));
}
}
} finally {
if (fileUrlReader != null) {
IOUtils.closeQuietly(fileUrlReader);
}
}
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class ModelPageProcessorTest method getMockPage.
private Page getMockPage() throws IOException {
Page page = new Page();
page.setRawText(IOUtils.toString(getClass().getClassLoader().getResourceAsStream("html/mock-webmagic.html")));
page.setRequest(new Request("http://webmagic.io/list/0"));
page.setUrl(new PlainText("http://webmagic.io/list/0"));
return page;
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class BloomFilterDuplicateRemoverTest method testMemory.
@Ignore("long time")
@Test
public void testMemory() throws Exception {
int times = 5000000;
DuplicateRemover duplicateRemover = new BloomFilterDuplicateRemover(times, 0.005);
long freeMemory = Runtime.getRuntime().freeMemory();
long time = System.currentTimeMillis();
for (int i = 0; i < times; i++) {
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
}
System.out.println("Time used by bloomfilter:" + (System.currentTimeMillis() - time));
System.out.println("Memory used by bloomfilter:" + (freeMemory - Runtime.getRuntime().freeMemory()));
duplicateRemover = new HashSetDuplicateRemover();
System.gc();
freeMemory = Runtime.getRuntime().freeMemory();
time = System.currentTimeMillis();
for (int i = 0; i < times; i++) {
duplicateRemover.isDuplicate(new Request(String.valueOf(i)), null);
}
System.out.println("Time used by hashset:" + (System.currentTimeMillis() - time));
System.out.println("Memory used by hashset:" + (freeMemory - Runtime.getRuntime().freeMemory()));
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class BloomFilterDuplicateRemoverTest method testRemove.
@Test
public void testRemove() throws Exception {
BloomFilterDuplicateRemover bloomFilterDuplicateRemover = new BloomFilterDuplicateRemover(10);
boolean isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("a"), null);
assertThat(isDuplicate).isFalse();
isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("a"), null);
assertThat(isDuplicate).isTrue();
isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("b"), null);
assertThat(isDuplicate).isFalse();
isDuplicate = bloomFilterDuplicateRemover.isDuplicate(new Request("b"), null);
assertThat(isDuplicate).isTrue();
}
use of us.codecraft.webmagic.Request in project webmagic by code4craft.
the class DelayQueueSchedulerTest method test.
@Ignore("infinite")
@Test
public void test() {
DelayQueueScheduler delayQueueScheduler = new DelayQueueScheduler(1, TimeUnit.SECONDS);
delayQueueScheduler.push(new Request("1"), null);
while (true) {
Request poll = delayQueueScheduler.poll(null);
System.out.println(System.currentTimeMillis() + "\t" + poll);
}
}
Aggregations