use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class PrefetchingSentenceIteratorTest method testLoadedIterator1.
@Test
public void testLoadedIterator1() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(file);
PrefetchingSentenceIterator fetcher = new PrefetchingSentenceIterator.Builder(iterator).setFetchSize(1000).build();
log.info("Phase 1 starting");
int cnt = 0;
while (fetcher.hasNext()) {
String line = fetcher.nextSentence();
// we'll imitate some workload in current thread by using ThreadSleep.
// there's no need to keep it enabled forever, just uncomment next line if you're going to test this iterator.
// otherwise this test will
// Thread.sleep(0, 10);
cnt++;
if (cnt % 10000 == 0)
log.info("Line processed: " + cnt);
}
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class PrefetchingSentenceIteratorTest method testHasMoreLinesFile.
@Test
public void testHasMoreLinesFile() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
BasicLineIterator iterator = new BasicLineIterator(file);
PrefetchingSentenceIterator fetcher = new PrefetchingSentenceIterator.Builder(iterator).setFetchSize(1000).build();
log.info("Phase 1 starting");
int cnt = 0;
while (fetcher.hasNext()) {
String line = fetcher.nextSentence();
// log.info(line);
cnt++;
}
assertEquals(97162, cnt);
log.info("Phase 2 starting");
fetcher.reset();
cnt = 0;
while (fetcher.hasNext()) {
String line = fetcher.nextSentence();
cnt++;
}
assertEquals(97162, cnt);
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class StreamLineIteratorTest method testHasNext.
@Test
public void testHasNext() throws Exception {
ClassPathResource reuters5250 = new ClassPathResource("/reuters/5250");
File f = reuters5250.getFile();
StreamLineIterator iterator = new StreamLineIterator.Builder(new FileInputStream(f)).setFetchSize(100).build();
int cnt = 0;
while (iterator.hasNext()) {
String line = iterator.nextSentence();
assertNotEquals(null, line);
logger.info("Line: " + line);
cnt++;
}
assertEquals(24, cnt);
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class GloveTest method before.
@Before
public void before() throws Exception {
ClassPathResource resource = new ClassPathResource("/raw_sentences.txt");
File file = resource.getFile();
iter = new LineSentenceIterator(file);
iter.setPreProcessor(new SentencePreProcessor() {
@Override
public String preProcess(String sentence) {
return sentence.toLowerCase();
}
});
}
use of org.datavec.api.util.ClassPathResource in project deeplearning4j by deeplearning4j.
the class ParagraphVectorsTest method testParagraphVectorsDM.
@Test
public void testParagraphVectorsDM() throws Exception {
ClassPathResource resource = new ClassPathResource("/big/raw_sentences.txt");
File file = resource.getFile();
SentenceIterator iter = new BasicLineIterator(file);
AbstractCache<VocabWord> cache = new AbstractCache.Builder<VocabWord>().build();
TokenizerFactory t = new DefaultTokenizerFactory();
t.setTokenPreProcessor(new CommonPreprocessor());
LabelsSource source = new LabelsSource("DOC_");
ParagraphVectors vec = new ParagraphVectors.Builder().minWordFrequency(1).iterations(2).seed(119).epochs(3).layerSize(100).learningRate(0.025).labelsSource(source).windowSize(5).iterate(iter).trainWordVectors(true).vocabCache(cache).tokenizerFactory(t).negativeSample(0).useHierarchicSoftmax(true).sampling(0).workers(1).usePreciseWeightInit(true).sequenceLearningAlgorithm(new DM<VocabWord>()).build();
vec.fit();
int cnt1 = cache.wordFrequency("day");
int cnt2 = cache.wordFrequency("me");
assertNotEquals(1, cnt1);
assertNotEquals(1, cnt2);
assertNotEquals(cnt1, cnt2);
double simDN = vec.similarity("day", "night");
log.info("day/night similariry: {}", simDN);
double similarity1 = vec.similarity("DOC_9835", "DOC_12492");
log.info("9835/12492 similarity: " + similarity1);
// assertTrue(similarity1 > 0.2d);
double similarity2 = vec.similarity("DOC_3720", "DOC_16392");
log.info("3720/16392 similarity: " + similarity2);
// assertTrue(similarity2 > 0.2d);
double similarity3 = vec.similarity("DOC_6347", "DOC_3720");
log.info("6347/3720 similarity: " + similarity3);
// assertTrue(similarity3 > 0.6d);
double similarityX = vec.similarity("DOC_3720", "DOC_9852");
log.info("3720/9852 similarity: " + similarityX);
assertTrue(similarityX < 0.5d);
// testing DM inference now
INDArray original = vec.getWordVectorMatrix("DOC_16392").dup();
INDArray inferredA1 = vec.inferVector("This is my work");
INDArray inferredB1 = vec.inferVector("This is my work .");
double cosAO1 = Transforms.cosineSim(inferredA1.dup(), original.dup());
double cosAB1 = Transforms.cosineSim(inferredA1.dup(), inferredB1.dup());
log.info("Cos O/A: {}", cosAO1);
log.info("Cos A/B: {}", cosAB1);
}
Aggregations