Search in sources :

Example 71 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class TestDataVecDataSetFunctions method testDataVecSequencePairDataSetFunction.

@Test
public void testDataVecSequencePairDataSetFunction() throws Exception {
    JavaSparkContext sc = getContext();
    //Convert data to a SequenceFile:
    File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
    String path = f.getPath();
    String folder = path.substring(0, path.length() - 17);
    path = folder + "*";
    PathToKeyConverter pathConverter = new PathToKeyConverterFilename();
    JavaPairRDD<Text, BytesPairWritable> toWrite = DataVecSparkUtil.combineFilesForSequenceFile(sc, path, path, pathConverter);
    Path p = Files.createTempDirectory("dl4j_testSeqPairFn");
    p.toFile().deleteOnExit();
    String outPath = p.toString() + "/out";
    new File(outPath).deleteOnExit();
    toWrite.saveAsNewAPIHadoopFile(outPath, Text.class, BytesPairWritable.class, SequenceFileOutputFormat.class);
    //Load from sequence file:
    JavaPairRDD<Text, BytesPairWritable> fromSeq = sc.sequenceFile(outPath, Text.class, BytesPairWritable.class);
    SequenceRecordReader srr1 = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader srr2 = new CSVSequenceRecordReader(1, ",");
    PairSequenceRecordReaderBytesFunction psrbf = new PairSequenceRecordReaderBytesFunction(srr1, srr2);
    JavaRDD<Tuple2<List<List<Writable>>, List<List<Writable>>>> writables = fromSeq.map(psrbf);
    //Map to DataSet:
    DataVecSequencePairDataSetFunction pairFn = new DataVecSequencePairDataSetFunction();
    JavaRDD<DataSet> data = writables.map(pairFn);
    List<DataSet> sparkData = data.collect();
    //Now: do the same thing locally (SequenceRecordReaderDataSetIterator) and compare
    String featuresPath = f.getAbsolutePath().replaceAll("0", "%d");
    SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
    SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
    featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    labelReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
    SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, -1, true);
    List<DataSet> localData = new ArrayList<>(3);
    while (iter.hasNext()) localData.add(iter.next());
    assertEquals(3, sparkData.size());
    assertEquals(3, localData.size());
    for (int i = 0; i < 3; i++) {
        //Check shapes etc. data sets order may differ for spark vs. local
        DataSet dsSpark = sparkData.get(i);
        DataSet dsLocal = localData.get(i);
        assertNull(dsSpark.getFeaturesMaskArray());
        assertNull(dsSpark.getLabelsMaskArray());
        INDArray fSpark = dsSpark.getFeatureMatrix();
        INDArray fLocal = dsLocal.getFeatureMatrix();
        INDArray lSpark = dsSpark.getLabels();
        INDArray lLocal = dsLocal.getLabels();
        //1 example, 3 values, 3 time steps
        int[] s = new int[] { 1, 3, 4 };
        assertArrayEquals(s, fSpark.shape());
        assertArrayEquals(s, fLocal.shape());
        assertArrayEquals(s, lSpark.shape());
        assertArrayEquals(s, lLocal.shape());
    }
    //Check that results are the same (order not withstanding)
    boolean[] found = new boolean[3];
    for (int i = 0; i < 3; i++) {
        int foundIndex = -1;
        DataSet ds = sparkData.get(i);
        for (int j = 0; j < 3; j++) {
            if (ds.equals(localData.get(j))) {
                if (foundIndex != -1)
                    //Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
                    fail();
                foundIndex = j;
                if (found[foundIndex])
                    //One of the other spark values was equal to this one -> suggests duplicates in Spark list
                    fail();
                //mark this one as seen before
                found[foundIndex] = true;
            }
        }
    }
    int count = 0;
    for (boolean b : found) if (b)
        count++;
    //Expect all 3 and exactly 3 pairwise matches between spark and local versions
    assertEquals(3, count);
}
Also used : DataSet(org.nd4j.linalg.dataset.DataSet) SequenceRecordReaderDataSetIterator(org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator) ArrayList(java.util.ArrayList) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) ArrayList(java.util.ArrayList) List(java.util.List) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Path(java.nio.file.Path) CSVSequenceRecordReader(org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader) SequenceRecordReader(org.datavec.api.records.reader.SequenceRecordReader) Text(org.apache.hadoop.io.Text) NumberedFileInputSplit(org.datavec.api.split.NumberedFileInputSplit) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Tuple2(scala.Tuple2) File(java.io.File) BaseSparkTest(org.deeplearning4j.spark.BaseSparkTest) Test(org.junit.Test)

Example 72 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class SparkUtils method checkKryoConfiguration.

/**
     * Check the spark configuration for incorrect Kryo configuration, logging a warning message if necessary
     *
     * @param javaSparkContext Spark context
     * @param log              Logger to log messages to
     * @return True if ok (no kryo, or correct kryo setup)
     */
public static boolean checkKryoConfiguration(JavaSparkContext javaSparkContext, Logger log) {
    //Check if kryo configuration is correct:
    String serializer = javaSparkContext.getConf().get("spark.serializer", null);
    if (serializer != null && serializer.equals("org.apache.spark.serializer.KryoSerializer")) {
        String kryoRegistrator = javaSparkContext.getConf().get("spark.kryo.registrator", null);
        if (kryoRegistrator == null || !kryoRegistrator.equals("org.nd4j.Nd4jRegistrator")) {
            //It's probably going to fail later due to Kryo failing on the INDArray deserialization (off-heap data)
            //But: the user might be using a custom Kryo registrator that can handle ND4J INDArrays, even if they
            // aren't using the official ND4J-provided one
            //Either way: Let's test serialization now of INDArrays now, and fail early if necessary
            SerializerInstance si;
            ByteBuffer bb;
            try {
                si = javaSparkContext.env().serializer().newInstance();
                bb = si.serialize(Nd4j.linspace(1, 5, 5), null);
            } catch (Exception e) {
                //Failed for some unknown reason during serialization - should never happen
                throw new RuntimeException(KRYO_EXCEPTION_MSG, e);
            }
            if (bb == null) {
                //Should probably never happen
                throw new RuntimeException(KRYO_EXCEPTION_MSG + "\n(Got: null ByteBuffer from Spark SerializerInstance)");
            } else {
                //Could serialize successfully, but still may not be able to deserialize if kryo config is wrong
                boolean equals;
                INDArray deserialized;
                try {
                    deserialized = si.deserialize(bb, null);
                    //Equals method may fail on malformed INDArrays, hence should be within the try-catch
                    equals = Nd4j.linspace(1, 5, 5).equals(deserialized);
                } catch (Exception e) {
                    throw new RuntimeException(KRYO_EXCEPTION_MSG, e);
                }
                if (!equals) {
                    throw new RuntimeException(KRYO_EXCEPTION_MSG + "\n(Error during deserialization: test array" + " was not deserialized successfully)");
                }
                //Otherwise: serialization/deserialization was successful using Kryo
                return true;
            }
        }
    }
    return true;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) SerializerInstance(org.apache.spark.serializer.SerializerInstance) ByteBuffer(java.nio.ByteBuffer)

Example 73 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class BaseSparkTest method getBasicSparkDataSet.

protected JavaRDD<DataSet> getBasicSparkDataSet(int nRows, INDArray input, INDArray labels) {
    List<DataSet> list = new ArrayList<>();
    for (int i = 0; i < nRows; i++) {
        INDArray inRow = input.getRow(i).dup();
        INDArray outRow = labels.getRow(i).dup();
        DataSet ds = new DataSet(inRow, outRow);
        list.add(ds);
    }
    list.iterator();
    data = new DataSet().merge(list);
    return sc.parallelize(list);
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) DataSet(org.nd4j.linalg.dataset.DataSet) ArrayList(java.util.ArrayList)

Example 74 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class BaseVaeScoreWithKeyFunctionAdapter method call.

@Override
public Iterable<Tuple2<K, Double>> call(Iterator<Tuple2<K, INDArray>> iterator) throws Exception {
    if (!iterator.hasNext()) {
        return Collections.emptyList();
    }
    VariationalAutoencoder vae = getVaeLayer();
    List<Tuple2<K, Double>> ret = new ArrayList<>();
    List<INDArray> collect = new ArrayList<>(batchSize);
    List<K> collectKey = new ArrayList<>(batchSize);
    int totalCount = 0;
    while (iterator.hasNext()) {
        collect.clear();
        collectKey.clear();
        int nExamples = 0;
        while (iterator.hasNext() && nExamples < batchSize) {
            Tuple2<K, INDArray> t2 = iterator.next();
            INDArray features = t2._2();
            int n = features.size(0);
            if (n != 1)
                throw new IllegalStateException("Cannot score examples with one key per data set if " + "data set contains more than 1 example (numExamples: " + n + ")");
            collect.add(features);
            collectKey.add(t2._1());
            nExamples += n;
        }
        totalCount += nExamples;
        INDArray toScore = Nd4j.vstack(collect);
        INDArray scores = computeScore(vae, toScore);
        double[] doubleScores = scores.data().asDouble();
        for (int i = 0; i < doubleScores.length; i++) {
            ret.add(new Tuple2<>(collectKey.get(i), doubleScores[i]));
        }
    }
    if (Nd4j.getExecutioner() instanceof GridExecutioner)
        ((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
    if (log.isDebugEnabled()) {
        log.debug("Scored {} examples ", totalCount);
    }
    return ret;
}
Also used : VariationalAutoencoder(org.deeplearning4j.nn.layers.variational.VariationalAutoencoder) ArrayList(java.util.ArrayList) GridExecutioner(org.nd4j.linalg.api.ops.executioner.GridExecutioner) INDArray(org.nd4j.linalg.api.ndarray.INDArray) Tuple2(scala.Tuple2)

Example 75 with INDArray

use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.

the class CGVaeReconstructionErrorWithKeyFunction method getVaeLayer.

@Override
public VariationalAutoencoder getVaeLayer() {
    ComputationGraph network = new ComputationGraph(ComputationGraphConfiguration.fromJson((String) jsonConfig.getValue()));
    network.init();
    INDArray val = ((INDArray) params.value()).unsafeDuplication();
    if (val.length() != network.numParams(false))
        throw new IllegalStateException("Network did not have same number of parameters as the broadcasted set parameters");
    network.setParams(val);
    Layer l = network.getLayer(0);
    if (!(l instanceof VariationalAutoencoder)) {
        throw new RuntimeException("Cannot use CGVaeReconstructionErrorWithKeyFunction on network that doesn't have a VAE " + "layer as layer 0. Layer type: " + l.getClass());
    }
    return (VariationalAutoencoder) l;
}
Also used : INDArray(org.nd4j.linalg.api.ndarray.INDArray) VariationalAutoencoder(org.deeplearning4j.nn.layers.variational.VariationalAutoencoder) ComputationGraph(org.deeplearning4j.nn.graph.ComputationGraph) Layer(org.deeplearning4j.nn.api.Layer)

Aggregations

INDArray (org.nd4j.linalg.api.ndarray.INDArray)1034 Test (org.junit.Test)453 NeuralNetConfiguration (org.deeplearning4j.nn.conf.NeuralNetConfiguration)173 DataSet (org.nd4j.linalg.dataset.DataSet)171 MultiLayerNetwork (org.deeplearning4j.nn.multilayer.MultiLayerNetwork)166 MultiLayerConfiguration (org.deeplearning4j.nn.conf.MultiLayerConfiguration)143 Gradient (org.deeplearning4j.nn.gradient.Gradient)100 Layer (org.deeplearning4j.nn.api.Layer)82 NormalDistribution (org.deeplearning4j.nn.conf.distribution.NormalDistribution)77 OutputLayer (org.deeplearning4j.nn.conf.layers.OutputLayer)69 DefaultGradient (org.deeplearning4j.nn.gradient.DefaultGradient)68 File (java.io.File)67 DenseLayer (org.deeplearning4j.nn.conf.layers.DenseLayer)66 ArrayList (java.util.ArrayList)65 ComputationGraph (org.deeplearning4j.nn.graph.ComputationGraph)62 DataSetIterator (org.nd4j.linalg.dataset.api.iterator.DataSetIterator)62 Pair (org.deeplearning4j.berkeley.Pair)56 Random (java.util.Random)54 ComputationGraphConfiguration (org.deeplearning4j.nn.conf.ComputationGraphConfiguration)53 IrisDataSetIterator (org.deeplearning4j.datasets.iterator.impl.IrisDataSetIterator)44