Search in sources :

Example 66 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class WordVectorSerializerTest method testLoaderTextSmall.

@Test
@Ignore
public void testLoaderTextSmall() throws Exception {
    INDArray vec = Nd4j.create(new double[] { 0.002001, 0.002210, -0.001915, -0.001639, 0.000683, 0.001511, 0.000470, 0.000106, -0.001802, 0.001109, -0.002178, 0.000625, -0.000376, -0.000479, -0.001658, -0.000941, 0.001290, 0.001513, 0.001485, 0.000799, 0.000772, -0.001901, -0.002048, 0.002485, 0.001901, 0.001545, -0.000302, 0.002008, -0.000247, 0.000367, -0.000075, -0.001492, 0.000656, -0.000669, -0.001913, 0.002377, 0.002190, -0.000548, -0.000113, 0.000255, -0.001819, -0.002004, 0.002277, 0.000032, -0.001291, -0.001521, -0.001538, 0.000848, 0.000101, 0.000666, -0.002107, -0.001904, -0.000065, 0.000572, 0.001275, -0.001585, 0.002040, 0.000463, 0.000560, -0.000304, 0.001493, -0.001144, -0.001049, 0.001079, -0.000377, 0.000515, 0.000902, -0.002044, -0.000992, 0.001457, 0.002116, 0.001966, -0.001523, -0.001054, -0.000455, 0.001001, -0.001894, 0.001499, 0.001394, -0.000799, -0.000776, -0.001119, 0.002114, 0.001956, -0.000590, 0.002107, 0.002410, 0.000908, 0.002491, -0.001556, -0.000766, -0.001054, -0.001454, 0.001407, 0.000790, 0.000212, -0.001097, 0.000762, 0.001530, 0.000097, 0.001140, -0.002476, 0.002157, 0.000240, -0.000916, -0.001042, -0.000374, -0.001468, -0.002185, -0.001419, 0.002139, -0.000885, -0.001340, 0.001159, -0.000852, 0.002378, -0.000802, -0.002294, 0.001358, -0.000037, -0.001744, 0.000488, 0.000721, -0.000241, 0.000912, -0.001979, 0.000441, 0.000908, -0.001505, 0.000071, -0.000030, -0.001200, -0.001416, -0.002347, 0.000011, 0.000076, 0.000005, -0.001967, -0.002481, -0.002373, -0.002163, -0.000274, 0.000696, 0.000592, -0.001591, 0.002499, -0.001006, -0.000637, -0.000702, 0.002366, -0.001882, 0.000581, -0.000668, 0.001594, 0.000020, 0.002135, -0.001410, -0.001303, -0.002096, -0.001833, -0.001600, -0.001557, 0.001222, -0.000933, 0.001340, 0.001845, 0.000678, 0.001475, 0.001238, 0.001170, -0.001775, -0.001717, -0.001828, -0.000066, 0.002065, -0.001368, -0.001530, -0.002098, 0.001653, -0.002089, -0.000290, 0.001089, -0.002309, -0.002239, 0.000721, 0.001762, 0.002132, 0.001073, 0.001581, -0.001564, -0.001820, 0.001987, -0.001382, 0.000877, 0.000287, 0.000895, -0.000591, 0.000099, -0.000843, -0.000563 });
    String w1 = "database";
    String w2 = "DBMS";
    WordVectors vecModel = WordVectorSerializer.loadGoogleModel(new ClassPathResource("word2vec/googleload/sample_vec.txt").getFile(), false, true);
    WordVectors vectorsBinary = WordVectorSerializer.loadGoogleModel(new ClassPathResource("word2vec/googleload/sample_vec.bin").getFile(), true, true);
    INDArray textWeights = vecModel.lookupTable().getWeights();
    INDArray binaryWeights = vectorsBinary.lookupTable().getWeights();
    Collection<String> nearest = vecModel.wordsNearest("database", 10);
    Collection<String> nearestBinary = vectorsBinary.wordsNearest("database", 10);
    System.out.println(nearestBinary);
    assertEquals(vecModel.similarity("DBMS", "DBMS's"), vectorsBinary.similarity("DBMS", "DBMS's"), 1e-1);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) WordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors) ClassPathResource(org.datavec.api.util.ClassPathResource) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 67 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class WordVectorSerializerTest method testUnifiedLoaderArchive1.

@Test
public void testUnifiedLoaderArchive1() throws Exception {
    logger.info("Executor name: {}", Nd4j.getExecutioner().getClass().getSimpleName());
    File w2v = new ClassPathResource("word2vec.dl4j/file.w2v").getFile();
    WordVectors vectorsLive = WordVectorSerializer.readWord2Vec(w2v);
    WordVectors vectorsUnified = WordVectorSerializer.readWord2VecModel(w2v, false);
    INDArray arrayLive = vectorsLive.getWordVectorMatrix("night");
    INDArray arrayStatic = vectorsUnified.getWordVectorMatrix("night");
    assertNotEquals(null, arrayLive);
    assertEquals(arrayLive, arrayStatic);
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1());
    assertEquals(null, ((InMemoryLookupTable) vectorsUnified.lookupTable()).getSyn1Neg());
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) WordVectors(org.deeplearning4j.models.embeddings.wordvectors.WordVectors) File(java.io.File) ClassPathResource(org.datavec.api.util.ClassPathResource) Test(org.junit.Test)

Example 68 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class CnnSentenceDataSetIterator method loadSingleSentence.

/**
     * Generally used post training time to load a single sentence for predictions
     */
public INDArray loadSingleSentence(String sentence) {
    List<String> tokens = tokenizeSentence(sentence);
    int[] featuresShape = new int[] { 1, 1, 0, 0 };
    if (sentencesAlongHeight) {
        featuresShape[2] = Math.min(maxSentenceLength, tokens.size());
        featuresShape[3] = wordVectorSize;
    } else {
        featuresShape[2] = wordVectorSize;
        featuresShape[3] = Math.min(maxSentenceLength, tokens.size());
    }
    INDArray features = Nd4j.create(featuresShape);
    int length = (sentencesAlongHeight ? featuresShape[2] : featuresShape[3]);
    for (int i = 0; i < length; i++) {
        INDArray vector = getVector(tokens.get(i));
        INDArrayIndex[] indices = new INDArrayIndex[4];
        indices[0] = NDArrayIndex.point(0);
        indices[1] = NDArrayIndex.point(0);
        if (sentencesAlongHeight) {
            indices[2] = NDArrayIndex.point(i);
            indices[3] = NDArrayIndex.all();
        } else {
            indices[2] = NDArrayIndex.all();
            indices[3] = NDArrayIndex.point(i);
        }
        features.put(indices, vector);
    }
    return features;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) INDArrayIndex(org.nd4j.linalg.indexing.INDArrayIndex)

Example 69 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class AddTest method testAdd.

@Test
public void testAdd() {
    List<INDArray> list = new ArrayList<>();
    for (int i = 0; i < 5; i++) list.add(Nd4j.ones(5));
    JavaRDD<INDArray> rdd = sc.parallelize(list);
    INDArray sum = rdd.fold(Nd4j.zeros(5), new Add());
    assertEquals(25, sum.sum(Integer.MAX_VALUE).getDouble(0), 1e-1);
}
Also used : Add(org.deeplearning4j.spark.impl.common.Add) INDArray(org.nd4j.linalg.api.ndarray.INDArray) ArrayList(java.util.ArrayList) Test(org.junit.Test) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest)

Example 70 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class TestDataVecDataSetFunctions method testDataVecSequencePairDataSetFunctionVariableLength.

@Test
public void testDataVecSequencePairDataSetFunctionVariableLength() throws Exception {
    //Same sort of test as testDataVecSequencePairDataSetFunction() but with variable length time series (labels shorter, align end)
    //Convert data to a SequenceFile:
    File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
    String pathFeatures = f.getAbsolutePath();
    String folderFeatures = pathFeatures.substring(0, pathFeatures.length() - 17);
    pathFeatures = folderFeatures + "*";
    File f2 = new File("src/test/resources/csvsequencelabels/csvsequencelabelsShort_0.txt");
    String pathLabels = f2.getPath();
    String folderLabels = pathLabels.substring(0, pathLabels.length() - 28);
    pathLabels = folderLabels + "*";
    //Extract a number from the file name
    PathToKeyConverter pathConverter = new PathToKeyConverterNumber();
    JavaPairRDD<Text, BytesPairWritable> toWrite = DataVecSparkUtil.combineFilesForSequenceFile(sc, pathFeatures, pathLabels, pathConverter);
    Path p = Files.createTempDirectory("dl4j_testSeqPairFnVarLength");
    p.toFile().deleteOnExit();
    String outPath = p.toString() + "/out";
    new File(outPath).deleteOnExit();
    toWrite.saveAsNewAPIHadoopFile(outPath, Text.class, BytesPairWritable.class, SequenceFileOutputFormat.class);
    //Load from sequence file:
    JavaPairRDD<Text, BytesPairWritable> fromSeq = sc.sequenceFile(outPath, Text.class, BytesPairWritable.class);
    SequenceRecordReader srr1 = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader srr2 = new CSVSequenceRecordReader(1, ",");
    PairSequenceRecordReaderBytesFunction psrbf = new PairSequenceRecordReaderBytesFunction(srr1, srr2);
    JavaRDD<Tuple2<List<List<Writable>>, List<List<Writable>>>> writables = fromSeq.map(psrbf);
    //Map to DataSet:
    DataVecSequencePairDataSetFunction pairFn = new DataVecSequencePairDataSetFunction(4, false, DataVecSequencePairDataSetFunction.AlignmentMode.ALIGN_END);
    JavaRDD<DataSet> data = writables.map(pairFn);
    List<DataSet> sparkData = data.collect();
    //Now: do the same thing locally (SequenceRecordReaderDataSetIterator) and compare
    String featuresPath = f.getPath().replaceAll("0", "%d");
    String labelsPath = f2.getPath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_END);
    List<DataSet> localData = new ArrayList<>(3);
    while (iter.hasNext()) localData.add(iter.next());
    assertEquals(3, sparkData.size());
    assertEquals(3, localData.size());
    //1 example, 3 values, 4 time steps
    int[] fShapeExp = new int[] { 1, 3, 4 };
    //1 example, 4 values/classes, 4 time steps (after padding)
    int[] lShapeExp = new int[] { 1, 4, 4 };
    for (int i = 0; i < 3; i++) {
        //Check shapes etc. data sets order may differ for spark vs. local
        DataSet dsSpark = sparkData.get(i);
        DataSet dsLocal = localData.get(i);
        //Expect mask array for labels
        assertNotNull(dsSpark.getLabelsMaskArray());
        INDArray fSpark = dsSpark.getFeatureMatrix();
        INDArray fLocal = dsLocal.getFeatureMatrix();
        INDArray lSpark = dsSpark.getLabels();
        INDArray lLocal = dsLocal.getLabels();
        assertArrayEquals(fShapeExp, fSpark.shape());
        assertArrayEquals(fShapeExp, fLocal.shape());
        assertArrayEquals(lShapeExp, lSpark.shape());
        assertArrayEquals(lShapeExp, lLocal.shape());
    }
    //Check that results are the same (order not withstanding)
    boolean[] found = new boolean[3];
    for (int i = 0; i < 3; i++) {
        int foundIndex = -1;
        DataSet ds = sparkData.get(i);
        for (int j = 0; j < 3; j++) {
            if (ds.equals(localData.get(j))) {
                if (foundIndex != -1)
                    //Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
                    fail();
                foundIndex = j;
                if (found[foundIndex])
                    //One of the other spark values was equal to this one -> suggests duplicates in Spark list
                    fail();
                //mark this one as seen before
                found[foundIndex] = true;
            }
        }
    }
    int count = 0;
    for (boolean b : found) if (b)
        count++;
    //Expect all 3 and exactly 3 pairwise matches between spark and local versions
    assertEquals(3, count);
    //-------------------------------------------------
    //NOW: test same thing, but for align start...
    DataVecSequencePairDataSetFunction pairFnAlignStart = new DataVecSequencePairDataSetFunction(4, false, DataVecSequencePairDataSetFunction.AlignmentMode.ALIGN_START);
    JavaRDD<DataSet> rddDataAlignStart = writables.map(pairFnAlignStart);
    List<DataSet> sparkDataAlignStart = rddDataAlignStart.collect();
    //re-initialize to reset
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(labelsPath, 0, 2));
    SequenceRecordReaderDataSetIterator iterAlignStart = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, 4, false, SequenceRecordReaderDataSetIterator.AlignmentMode.ALIGN_START);
    List<DataSet> localDataAlignStart = new ArrayList<>(3);
    while (iterAlignStart.hasNext()) localDataAlignStart.add(iterAlignStart.next());
    assertEquals(3, sparkDataAlignStart.size());
    assertEquals(3, localDataAlignStart.size());
    for (int i = 0; i < 3; i++) {
        //Check shapes etc. data sets order may differ for spark vs. local
        DataSet dsSpark = sparkDataAlignStart.get(i);
        DataSet dsLocal = localDataAlignStart.get(i);
        //Expect mask array for labels
        assertNotNull(dsSpark.getLabelsMaskArray());
        INDArray fSpark = dsSpark.getFeatureMatrix();
        INDArray fLocal = dsLocal.getFeatureMatrix();
        INDArray lSpark = dsSpark.getLabels();
        INDArray lLocal = dsLocal.getLabels();
        assertArrayEquals(fShapeExp, fSpark.shape());
        assertArrayEquals(fShapeExp, fLocal.shape());
        assertArrayEquals(lShapeExp, lSpark.shape());
        assertArrayEquals(lShapeExp, lLocal.shape());
    }
    //Check that results are the same (order not withstanding)
    found = new boolean[3];
    for (int i = 0; i < 3; i++) {
        int foundIndex = -1;
        DataSet ds = sparkData.get(i);
        for (int j = 0; j < 3; j++) {
            if (ds.equals(localData.get(j))) {
                if (foundIndex != -1)
                    //Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
                    fail();
                foundIndex = j;
                if (found[foundIndex])
                    //One of the other spark values was equal to this one -> suggests duplicates in Spark list
                    fail();
                //mark this one as seen before
                found[foundIndex] = true;
            }
        }
    }
    count = 0;
    for (boolean b : found) if (b)
        count++;
    //Expect all 3 and exactly 3 pairwise matches between spark and local versions
    assertEquals(3, count);
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) SequenceRecordReaderDataSetIterator(org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator) ArrayList(java.util.ArrayList) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) ArrayList(java.util.ArrayList) List(java.util.List) Path(java.nio.file.Path) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) Text(org.apache.hadoop.io.Text) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Tuple2(scala.Tuple2) File(java.io.File) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Aggregations

INDArray (org.nd4j.linalg.api.ndarray.INDArray)1034 Test (org.junit.Test)453 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)173 DataSet (org.nd4j.linalg.dataset.DataSet)171 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)166 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)143 Gradient (org.deeplearning4j.nn.gradient.Gradient)100 Layer (org.deeplearning4j.nn.api.Layer)82 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)77 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)69 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)68 File (java.io.File)67 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)66 ArrayList (java.util.ArrayList)65 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)62 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)62 Pair (org.deeplearning4j.berkeley.Pair)56 Random (java.util.Random)54 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)53 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)44