Search in sources :

Example 11 with Sequence

use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.

the class ParallelTransformerIteratorTest method hasNext.

@Test
public void hasNext() throws Exception {
    SentenceIterator iterator = new BasicLineIterator(new ClassPathResource("/big/raw_sentences.txt").getFile());
    SentenceTransformer transformer = new SentenceTransformer.Builder().iterator(iterator).allowMultithreading(true).tokenizerFactory(factory).build();
    Iterator<Sequence<VocabWord>> iter = transformer.iterator();
    int cnt = 0;
    Sequence<VocabWord> sequence = null;
    while (iter.hasNext()) {
        sequence = iter.next();
        assertNotEquals("Failed on [" + cnt + "] iteration", null, sequence);
        assertNotEquals("Failed on [" + cnt + "] iteration", 0, sequence.size());
        cnt++;
    }
    //   log.info("Last element: {}", sequence.asLabels());
    assertEquals(97162, cnt);
}
Also used : BasicLineIterator(org.deeplearning4j.text.sentenceiterator.BasicLineIterator) VocabWord(org.deeplearning4j.models.word2vec.VocabWord) SentenceTransformer(org.deeplearning4j.models.sequencevectors.transformers.impl.SentenceTransformer) Sequence(org.deeplearning4j.models.sequencevectors.sequence.Sequence) PrefetchingSentenceIterator(org.deeplearning4j.text.sentenceiterator.PrefetchingSentenceIterator) SentenceIterator(org.deeplearning4j.text.sentenceiterator.SentenceIterator) MutipleEpochsSentenceIterator(org.deeplearning4j.text.sentenceiterator.MutipleEpochsSentenceIterator) ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 12 with Sequence

use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.

the class VocabConstructorTest method testCounter1.

@Test
public void testCounter1() throws Exception {
    VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
    final List<VocabWord> words = new ArrayList<>();
    words.add(new VocabWord(1, "word"));
    words.add(new VocabWord(2, "test"));
    words.add(new VocabWord(1, "here"));
    Iterable<Sequence<VocabWord>> iterable = new Iterable<Sequence<VocabWord>>() {

        @Override
        public Iterator<Sequence<VocabWord>> iterator() {
            return new Iterator<Sequence<VocabWord>>() {

                private AtomicBoolean switcher = new AtomicBoolean(true);

                @Override
                public boolean hasNext() {
                    return switcher.getAndSet(false);
                }

                @Override
                public Sequence<VocabWord> next() {
                    Sequence<VocabWord> sequence = new Sequence<>(words);
                    return sequence;
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }
            };
        }
    };
    SequenceIterator<VocabWord> sequenceIterator = new AbstractSequenceIterator.Builder<>(iterable).build();
    VocabConstructor<VocabWord> constructor = new VocabConstructor.Builder<VocabWord>().addSource(sequenceIterator, 0).useAdaGrad(false).setTargetVocabCache(vocabCache).build();
    constructor.buildJointVocabulary(false, true);
    assertEquals(3, vocabCache.numWords());
    assertEquals(1, vocabCache.wordFrequency("test"));
}
Also used : VocabWord(org.deeplearning4j.models.word2vec.VocabWord) Sequence(org.deeplearning4j.models.sequencevectors.sequence.Sequence) AbstractCache(org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AbstractSequenceIterator(org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator) FileLabelAwareIterator(org.deeplearning4j.text.documentiterator.FileLabelAwareIterator) SentenceIterator(org.deeplearning4j.text.sentenceiterator.SentenceIterator) AbstractSequenceIterator(org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator) BasicLineIterator(org.deeplearning4j.text.sentenceiterator.BasicLineIterator) SequenceIterator(org.deeplearning4j.models.sequencevectors.interfaces.SequenceIterator) Test(org.junit.Test)

Example 13 with Sequence

use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.

the class VocabConstructorTest method testCounter2.

@Test
public void testCounter2() throws Exception {
    VocabCache<VocabWord> vocabCache = new AbstractCache.Builder<VocabWord>().build();
    final List<VocabWord> words = new ArrayList<>();
    words.add(new VocabWord(1, "word"));
    words.add(new VocabWord(0, "test"));
    words.add(new VocabWord(1, "here"));
    Iterable<Sequence<VocabWord>> iterable = new Iterable<Sequence<VocabWord>>() {

        @Override
        public Iterator<Sequence<VocabWord>> iterator() {
            return new Iterator<Sequence<VocabWord>>() {

                private AtomicBoolean switcher = new AtomicBoolean(true);

                @Override
                public boolean hasNext() {
                    return switcher.getAndSet(false);
                }

                @Override
                public Sequence<VocabWord> next() {
                    Sequence<VocabWord> sequence = new Sequence<>(words);
                    return sequence;
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }
            };
        }
    };
    SequenceIterator<VocabWord> sequenceIterator = new AbstractSequenceIterator.Builder<>(iterable).build();
    VocabConstructor<VocabWord> constructor = new VocabConstructor.Builder<VocabWord>().addSource(sequenceIterator, 0).useAdaGrad(false).setTargetVocabCache(vocabCache).build();
    constructor.buildJointVocabulary(false, true);
    assertEquals(3, vocabCache.numWords());
    assertEquals(1, vocabCache.wordFrequency("test"));
}
Also used : VocabWord(org.deeplearning4j.models.word2vec.VocabWord) Sequence(org.deeplearning4j.models.sequencevectors.sequence.Sequence) AbstractCache(org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) AbstractSequenceIterator(org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator) FileLabelAwareIterator(org.deeplearning4j.text.documentiterator.FileLabelAwareIterator) SentenceIterator(org.deeplearning4j.text.sentenceiterator.SentenceIterator) AbstractSequenceIterator(org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator) BasicLineIterator(org.deeplearning4j.text.sentenceiterator.BasicLineIterator) SequenceIterator(org.deeplearning4j.models.sequencevectors.interfaces.SequenceIterator) Test(org.junit.Test)

Example 14 with Sequence

use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.

the class SparkParagraphVectors method fitMultipleFiles.

/**
     * This method builds ParagraphVectors model, expecting JavaPairRDD with key as label, and value as document-in-a-string.
     *
     * @param documentsRdd
     */
public void fitMultipleFiles(JavaPairRDD<String, String> documentsRdd) {
    /*
            All we want here, is to transform JavaPairRDD into JavaRDD<Sequence<VocabWord>>
         */
    validateConfiguration();
    broadcastEnvironment(new JavaSparkContext(documentsRdd.context()));
    JavaRDD<Sequence<VocabWord>> sequenceRdd = documentsRdd.map(new KeySequenceConvertFunction(configurationBroadcast));
    super.fitSequences(sequenceRdd);
}
Also used : KeySequenceConvertFunction(org.deeplearning4j.spark.models.paragraphvectors.functions.KeySequenceConvertFunction) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Sequence(org.deeplearning4j.models.sequencevectors.sequence.Sequence)

Example 15 with Sequence

use of org.deeplearning4j.models.sequencevectors.sequence.Sequence in project deeplearning4j by deeplearning4j.

the class DocumentSequenceConvertFunction method call.

@Override
public Sequence<VocabWord> call(LabelledDocument document) throws Exception {
    Sequence<VocabWord> sequence = new Sequence<>();
    // get elements
    if (document.getReferencedContent() != null && !document.getReferencedContent().isEmpty()) {
        sequence.addElements(document.getReferencedContent());
    } else {
        if (tokenizerFactory == null)
            instantiateTokenizerFactory();
        List<String> tokens = tokenizerFactory.create(document.getContent()).getTokens();
        for (String token : tokens) {
            if (token == null || token.isEmpty())
                continue;
            VocabWord word = new VocabWord(1.0, token);
            sequence.addElement(word);
        }
    }
    // get labels
    for (String label : document.getLabels()) {
        if (label == null || label.isEmpty())
            continue;
        VocabWord labelElement = new VocabWord(1.0, label);
        labelElement.markAsLabel(true);
        sequence.addSequenceLabel(labelElement);
    }
    return sequence;
}
Also used : VocabWord(org.deeplearning4j.models.word2vec.VocabWord) Sequence(org.deeplearning4j.models.sequencevectors.sequence.Sequence)

Aggregations

Sequence (org.deeplearning4j.models.sequencevectors.sequence.Sequence)18 VocabWord (org.deeplearning4j.models.word2vec.VocabWord)11 Test (org.junit.Test)5 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4 ShallowSequenceElement (org.deeplearning4j.models.sequencevectors.sequence.ShallowSequenceElement)4 BasicLineIterator (org.deeplearning4j.text.sentenceiterator.BasicLineIterator)4 SentenceIterator (org.deeplearning4j.text.sentenceiterator.SentenceIterator)4 ND4JIllegalStateException (org.nd4j.linalg.exception.ND4JIllegalStateException)3 ArrayList (java.util.ArrayList)2 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)2 ClassPathResource (org.datavec.api.util.ClassPathResource)2 SequenceIterator (org.deeplearning4j.models.sequencevectors.interfaces.SequenceIterator)2 AbstractSequenceIterator (org.deeplearning4j.models.sequencevectors.iterators.AbstractSequenceIterator)2 SentenceTransformer (org.deeplearning4j.models.sequencevectors.transformers.impl.SentenceTransformer)2 AbstractCache (org.deeplearning4j.models.word2vec.wordstore.inmemory.AbstractCache)2 FileLabelAwareIterator (org.deeplearning4j.text.documentiterator.FileLabelAwareIterator)2 MutipleEpochsSentenceIterator (org.deeplearning4j.text.sentenceiterator.MutipleEpochsSentenceIterator)2 PrefetchingSentenceIterator (org.deeplearning4j.text.sentenceiterator.PrefetchingSentenceIterator)2 RoutedTransport (org.nd4j.parameterserver.distributed.transport.RoutedTransport)2 List (java.util.List)1