use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class BasicLineIteratorTest method testHasMoreLinesStream.
@Test
public void testHasMoreLinesStream() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
//.getParentFile();
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(new FileInputStream(file));
int cnt = 0;
while (iterator.hasNext()) {
String line = iterator.nextSentence();
cnt++;
}
assertEquals(97162, cnt);
iterator.reset();
cnt = 0;
while (iterator.hasNext()) {
String line = iterator.nextSentence();
cnt++;
}
assertEquals(97162, cnt);
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class BasicLineIteratorTest method testHasMoreLinesFile.
@Test
public void testHasMoreLinesFile() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(file);
int cnt = 0;
while (iterator.hasNext()) {
String line = iterator.nextSentence();
cnt++;
}
assertEquals(97162, cnt);
iterator.reset();
cnt = 0;
while (iterator.hasNext()) {
String line = iterator.nextSentence();
cnt++;
}
assertEquals(97162, cnt);
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class MutipleEpochsSentenceIteratorTest method hasNext.
@Test
public void hasNext() throws Exception {
SentenceIterator iterator = new MutipleEpochsSentenceIterator(new BasicLineIterator(new ClassPathResource("/big/raw_sentences.txt").getFile()), 100);
int cnt = 0;
while (iterator.hasNext()) {
iterator.nextSentence();
cnt++;
}
assertEquals(9716200, cnt);
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class PrefetchingSentenceIteratorTest method testPerformance1.
@Test
public void testPerformance1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(file);
PrefetchingSentenceIterator fetcher = new PrefetchingSentenceIterator.Builder(new BasicLineIterator(file)).setFetchSize(500000).build();
long time01 = System.currentTimeMillis();
int cnt0 = 0;
while (iterator.hasNext()) {
iterator.nextSentence();
cnt0++;
}
long time02 = System.currentTimeMillis();
long time11 = System.currentTimeMillis();
int cnt1 = 0;
while (fetcher.hasNext()) {
fetcher.nextSentence();
cnt1++;
}
long time12 = System.currentTimeMillis();
log.info("Basic iterator: " + (time02 - time01));
log.info("Prefetched iterator: " + (time12 - time11));
long difference = (time12 - time11) - (time02 - time01);
log.info("Difference: " + difference);
// on small corpus time difference can fluctuate a lot
// but it's still can be used as effectiveness measurement
assertTrue(difference < 150);
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class DefaulTokenizerTests method testDefaultTokenizer2.
@Test
public void testDefaultTokenizer2() throws Exception {
String toTokenize = "Mary had a little lamb.";
TokenizerFactory t = new DefaultTokenizerFactory();
Tokenizer tokenizer = t.create(toTokenize);
Tokenizer tokenizer2 = t.create(new ByteArrayInputStream(toTokenize.getBytes()));
tokenizer2.countTokens();
while (tokenizer.hasMoreTokens()) {
String tok1 = tokenizer.nextToken();
String tok2 = tokenizer2.nextToken();
assertEquals(tok1, tok2);
}
System.out.println("-----------------------------------------------");
ClassPathResource resource = new ClassPathResource("reuters/5250");
String str = FileUtils.readFileToString(resource.getFile());
int stringCount = t.create(str).countTokens();
int stringCount2 = t.create(resource.getInputStream()).countTokens();
log.info("String tok: [" + stringCount + "], Stream tok: [" + stringCount2 + "], Difference: " + Math.abs(stringCount - stringCount2));
assertTrue(Math.abs(stringCount - stringCount2) < 2);
}
Aggregations