use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.
the class ParallelTransformerIteratorTest method hasNext.
@Test
public void hasNext() throws Exception {
SentenceIterator iterator = new BasicLineIterator(new ClassPathResource("/big/raw_sentences.txt").getFile());
SentenceTransformer transformer = new SentenceTransformer.Builder().iterator(iterator).allowMultithreading(true).tokenizerFactory(factory).build();
Iterator<Sequence<VocabWord>> iter = transformer.iterator();
int cnt = 0;
Sequence<VocabWord> sequence = null;
while (iter.hasNext()) {
sequence = iter.next();
assertNotEquals("Failed on [" + cnt + "] iteration", null, sequence);
assertNotEquals("Failed on [" + cnt + "] iteration", 0, sequence.size());
cnt++;
}
// log.info("Last element: {}", sequence.asLabels());
assertEquals(97162, cnt);
}
use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.
the class VocabConstructorTest method testCounter1.
@Test
public void testCounter1() throws Exception {
VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
final List<VocabWord> words = new ArrayList<>();
words.add(new VocabWord(1, "word"));
words.add(new VocabWord(2, "test"));
words.add(new VocabWord(1, "here"));
Iterable<Sequence<VocabWord>> iterable = new Iterable<Sequence<VocabWord>>() {
@Override
public Iterator<Sequence<VocabWord>> iterator() {
return new Iterator<Sequence<VocabWord>>() {
private AtomicBoolean switcher = new AtomicBoolean(true);
@Override
public boolean hasNext() {
return switcher.getAndSet(false);
}
@Override
public Sequence<VocabWord> next() {
Sequence<VocabWord> sequence = new Sequence<>(words);
return sequence;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
SequenceIterator<VocabWord> sequenceIterator = new AbstractSequenceIterator.Builder<>(iterable).build();
VocabConstructor<VocabWord> constructor = new VocabConstructor.Builder<VocabWord>().addSource(sequenceIterator, 0).useAdaGrad(false).setTargetVocabCache(vocabCache).build();
constructor.buildJointVocabulary(false, true);
assertEquals(3, vocabCache.numWords());
assertEquals(1, vocabCache.wordFrequency("test"));
}
use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.
the class VocabConstructorTest method testCounter2.
@Test
public void testCounter2() throws Exception {
VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
final List<VocabWord> words = new ArrayList<>();
words.add(new VocabWord(1, "word"));
words.add(new VocabWord(0, "test"));
words.add(new VocabWord(1, "here"));
Iterable<Sequence<VocabWord>> iterable = new Iterable<Sequence<VocabWord>>() {
@Override
public Iterator<Sequence<VocabWord>> iterator() {
return new Iterator<Sequence<VocabWord>>() {
private AtomicBoolean switcher = new AtomicBoolean(true);
@Override
public boolean hasNext() {
return switcher.getAndSet(false);
}
@Override
public Sequence<VocabWord> next() {
Sequence<VocabWord> sequence = new Sequence<>(words);
return sequence;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
SequenceIterator<VocabWord> sequenceIterator = new AbstractSequenceIterator.Builder<>(iterable).build();
VocabConstructor<VocabWord> constructor = new VocabConstructor.Builder<VocabWord>().addSource(sequenceIterator, 0).useAdaGrad(false).setTargetVocabCache(vocabCache).build();
constructor.buildJointVocabulary(false, true);
assertEquals(3, vocabCache.numWords());
assertEquals(1, vocabCache.wordFrequency("test"));
}
use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.
the class SparkParagraphVectors method fitMultipleFiles.
/**
* This method builds ParagraphVectors model, expecting JavaPairRDD with key as label, and value as document-in-a-string.
*
* @param documentsRdd
*/
public void fitMultipleFiles(JavaPairRDD<String, String> documentsRdd) {
/*
All we want here, is to transform JavaPairRDD into JavaRDD<Sequence<VocabWord>>
*/
validateConfiguration();
broadcastEnvironment(new JavaSparkContext(documentsRdd.context()));
JavaRDD<Sequence<VocabWord>> sequenceRdd = documentsRdd.map(new KeySequenceConvertFunction(configurationBroadcast));
super.fitSequences(sequenceRdd);
}
use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.
the class DocumentSequenceConvertFunction method call.
@Override
public Sequence<VocabWord> call(LabelledDocument document) throws Exception {
Sequence<VocabWord> sequence = new Sequence<>();
// get elements
if (document.getReferencedContent() != null && !document.getReferencedContent().isEmpty()) {
sequence.addElements(document.getReferencedContent());
} else {
if (tokenizerFactory == null)
instantiateTokenizerFactory();
List<String> tokens = tokenizerFactory.create(document.getContent()).getTokens();
for (String token : tokens) {
if (token == null || token.isEmpty())
continue;
VocabWord word = new VocabWord(1.0, token);
sequence.addElement(word);
}
}
// get labels
for (String label : document.getLabels()) {
if (label == null || label.isEmpty())
continue;
VocabWord labelElement = new VocabWord(1.0, label);
labelElement.markAsLabel(true);
sequence.addSequenceLabel(labelElement);
}
return sequence;
}
Aggregations