Search in sources :

Example 61 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class ParagraphVectors method predictSeveral.

/**
     * Predict several labels based on the document.
     * Computes a similarity wrt the mean of the
     * representation of words in the document
     * @param document the document
     * @return possible labels in descending order
     */
@Deprecated
public Collection<String> predictSeveral(List<VocabWord> document, int limit) {
    /*
            This code was transferred from original ParagraphVectors DL4j implementation, and yet to be tested
         */
    if (document.isEmpty())
        throw new IllegalStateException("Document has no words inside");
    INDArray arr = Nd4j.create(document.size(), this.layerSize);
    for (int i = 0; i < document.size(); i++) {
        arr.putRow(i, getWordVectorMatrix(document.get(i).getWord()));
    }
    INDArray docMean = arr.mean(0);
    Counter<String> distances = new Counter<>();
    for (String s : labelsSource.getLabels()) {
        INDArray otherVec = getWordVectorMatrix(s);
        double sim = Transforms.cosineSim(docMean, otherVec);
        log.debug("Similarity inside: [" + s + "] -> " + sim);
        distances.incrementCount(s, sim);
    }
    return distances.getSortedKeys().subList(0, limit);
}
Also used : ND4JIllegalStateException(org.nd4j.linalg.exception.ND4JIllegalStateException) Counter(org.deeplearning4j.berkeley.Counter) INDArray(org.nd4j.linalg.api.ndarray.INDArray)

Example 62 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class WordVectorSerializerTest method testIndexPersistence.

@Test
public void testIndexPersistence() throws Exception {
    File inputFile = new ClassPathResource("/big/raw_sentences.txt").getFile();
    SentenceIterator iter = UimaSentenceIterator.createWithPath(inputFile.getAbsolutePath());
    // Split on white spaces in the line to get words
    TokenizerFactory t = new DefaultTokenizerFactory();
    t.setTokenPreProcessor(new CommonPreprocessor());
    Word2Vec vec = new Word2Vec.Builder().minWordFrequency(5).iterations(1).epochs(1).layerSize(100).stopWords(new ArrayList<String>()).useAdaGrad(false).negativeSample(5).seed(42).windowSize(5).iterate(iter).tokenizerFactory(t).build();
    vec.fit();
    VocabCache orig = vec.getVocab();
    File tempFile = File.createTempFile("temp", "w2v");
    tempFile.deleteOnExit();
    WordVectorSerializer.writeWordVectors(vec, tempFile);
    WordVectors vec2 = WordVectorSerializer.loadTxtVectors(tempFile);
    VocabCache rest = vec2.vocab();
    assertEquals(orig.totalNumberOfDocs(), rest.totalNumberOfDocs());
    for (VocabWord word : vec.getVocab().vocabWords()) {
        INDArray array1 = vec.getWordVectorMatrix(word.getLabel());
        INDArray array2 = vec2.getWordVectorMatrix(word.getLabel());
        assertEquals(array1, array2);
    }
}
Also used : TokenizerFactory(org.deeplearning4j.text.tokenization.tokenizerfactory.TokenizerFactory) DefaultTokenizerFactory(org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory) ArrayList(java.util.ArrayList) VocabWord(org.deeplearning4j.models.word2vec.VocabWord) ClassPathResource(org.datavec.api.util.ClassPathResource) SentenceIterator(org.deeplearning4j.text.sentenceiterator.SentenceIterator) UimaSentenceIterator(org.deeplearning4j.text.sentenceiterator.UimaSentenceIterator) DefaultTokenizerFactory(org.deeplearning4j.text.tokenization.tokenizerfactory.DefaultTokenizerFactory) CommonPreprocessor(org.deeplearning4j.text.tokenization.tokenizer.preprocessor.CommonPreprocessor) INDArray(org.nd4j.linalg.api.ndarray.INDArray) VocabCache(org.deeplearning4j.models.word2vec.wordstore.VocabCache) Word2Vec(org.deeplearning4j.models.word2vec.Word2Vec) WordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors) File(java.io.File) Test(org.junit.Test)

Example 63 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class WordVectorSerializerTest method testUnifiedLoaderArchive2.

@Test
public void testUnifiedLoaderArchive2() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, true);
    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");
    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
    assertNotEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) WordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors) File(java.io.File) ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 64 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class WordVectorSerializerTest method testUnifiedLoaderText.

/**
     * This method tests CSV file loading via unified loader
     *
     * @throws Exception
     */
@Test
public void testUnifiedLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(textFile, true);
    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("Morgan_Freeman");
    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
    // we're trying EXTENDED model, but file doesn't have syn1/huffman info, so it should be silently degraded to simplified model
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) WordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors) Test(org.junit.Test)

Example 65 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class WordVectorSerializerTest method testStaticLoaderText.

/**
     * This method tests CSV file loading as static model
     *
     * @throws Exception
     */
@Test
public void testStaticLoaderText() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
    WordVectors vectorsLive = WordVectorSerializer.loadTxtVectors(textFile);
    WordVectors vectorsStatic = WordVectorSerializer.loadStaticModel(textFile);
    INDArray arrayLive = vectorsLive.getWordVectorMatrix("Morgan_Freeman");
    INDArray arrayStatic = vectorsStatic.getWordVectorMatrix("Morgan_Freeman");
    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) WordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors) Test(org.junit.Test)

Aggregations

INDArray (org.nd4j.linalg.api.ndarray.INDArray)1034 Test (org.junit.Test)453 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)173 DataSet (org.nd4j.linalg.dataset.DataSet)171 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)166 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)143 Gradient (org.deeplearning4j.nn.gradient.Gradient)100 Layer (org.deeplearning4j.nn.api.Layer)82 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)77 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)69 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)68 File (java.io.File)67 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)66 ArrayList (java.util.ArrayList)65 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)62 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)62 Pair (org.deeplearning4j.berkeley.Pair)56 Random (java.util.Random)54 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)53 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)44