use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class TestDataVecDataSetFunctions method testDataVecSequencePairDataSetFunction.
@Test
public void testDataVecSequencePairDataSetFunction() throws Exception {
JavaSparkContext sc = getContext();
//Convert data to a SequenceFile:
File f = new File("src/test/resources/csvsequence/csvsequence_0.txt");
String path = f.getPath();
String folder = path.substring(0, path.length() - 17);
path = folder + "*";
PathToKeyConverter pathConverter = new PathToKeyConverterFilename();
JavaPairRDD<Text, BytesPairWritable> toWrite = DataVecSparkUtil.combineFilesForSequenceFile(sc, path, path, pathConverter);
Path p = Files.createTempDirectory("dl4j_testSeqPairFn");
p.toFile().deleteOnExit();
String outPath = p.toString() + "/out";
new File(outPath).deleteOnExit();
toWrite.saveAsNewAPIHadoopFile(outPath, Text.class, BytesPairWritable.class, SequenceFileOutputFormat.class);
//Load from sequence file:
JavaPairRDD<Text, BytesPairWritable> fromSeq = sc.sequenceFile(outPath, Text.class, BytesPairWritable.class);
SequenceRecordReader srr1 = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader srr2 = new CSVSequenceRecordReader(1, ",");
PairSequenceRecordReaderBytesFunction psrbf = new PairSequenceRecordReaderBytesFunction(srr1, srr2);
JavaRDD<Tuple2<List<List<Writable>>, List<List<Writable>>>> writables = fromSeq.map(psrbf);
//Map to DataSet:
DataVecSequencePairDataSetFunction pairFn = new DataVecSequencePairDataSetFunction();
JavaRDD<DataSet> data = writables.map(pairFn);
List<DataSet> sparkData = data.collect();
//Now: do the same thing locally (SequenceRecordReaderDataSetIterator) and compare
String featuresPath = f.getAbsolutePath().replaceAll("0", "%d");
SequenceRecordReader featureReader = new CSVSequenceRecordReader(1, ",");
SequenceRecordReader labelReader = new CSVSequenceRecordReader(1, ",");
featureReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
labelReader.initialize(new NumberedFileInputSplit(featuresPath, 0, 2));
SequenceRecordReaderDataSetIterator iter = new SequenceRecordReaderDataSetIterator(featureReader, labelReader, 1, -1, true);
List<DataSet> localData = new ArrayList<>(3);
while (iter.hasNext()) localData.add(iter.next());
assertEquals(3, sparkData.size());
assertEquals(3, localData.size());
for (int i = 0; i < 3; i++) {
//Check shapes etc. data sets order may differ for spark vs. local
DataSet dsSpark = sparkData.get(i);
DataSet dsLocal = localData.get(i);
assertNull(dsSpark.getFeaturesMaskArray());
assertNull(dsSpark.getLabelsMaskArray());
INDArray fSpark = dsSpark.getFeatureMatrix();
INDArray fLocal = dsLocal.getFeatureMatrix();
INDArray lSpark = dsSpark.getLabels();
INDArray lLocal = dsLocal.getLabels();
//1 example, 3 values, 3 time steps
int[] s = new int[] { 1, 3, 4 };
assertArrayEquals(s, fSpark.shape());
assertArrayEquals(s, fLocal.shape());
assertArrayEquals(s, lSpark.shape());
assertArrayEquals(s, lLocal.shape());
}
//Check that results are the same (order not withstanding)
boolean[] found = new boolean[3];
for (int i = 0; i < 3; i++) {
int foundIndex = -1;
DataSet ds = sparkData.get(i);
for (int j = 0; j < 3; j++) {
if (ds.equals(localData.get(j))) {
if (foundIndex != -1)
//Already found this value -> suggests this spark value equals two or more of local version? (Shouldn't happen)
fail();
foundIndex = j;
if (found[foundIndex])
//One of the other spark values was equal to this one -> suggests duplicates in Spark list
fail();
//mark this one as seen before
found[foundIndex] = true;
}
}
}
int count = 0;
for (boolean b : found) if (b)
count++;
//Expect all 3 and exactly 3 pairwise matches between spark and local versions
assertEquals(3, count);
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class SparkUtils method checkKryoConfiguration.
/**
* Check the spark configuration for incorrect Kryo configuration, logging a warning message if necessary
*
* @param javaSparkContext Spark context
* @param log Logger to log messages to
* @return True if ok (no kryo, or correct kryo setup)
*/
public static boolean checkKryoConfiguration(JavaSparkContext javaSparkContext, Logger log) {
//Check if kryo configuration is correct:
String serializer = javaSparkContext.getConf().get("spark.serializer", null);
if (serializer != null && serializer.equals("org.apache.spark.serializer.KryoSerializer")) {
String kryoRegistrator = javaSparkContext.getConf().get("spark.kryo.registrator", null);
if (kryoRegistrator == null || !kryoRegistrator.equals("org.nd4j.Nd4jRegistrator")) {
//It's probably going to fail later due to Kryo failing on the INDArray deserialization (off-heap data)
//But: the user might be using a custom Kryo registrator that can handle ND4J INDArrays, even if they
// aren't using the official ND4J-provided one
//Either way: Let's test serialization now of INDArrays now, and fail early if necessary
SerializerInstance si;
ByteBuffer bb;
try {
si = javaSparkContext.env().serializer().newInstance();
bb = si.serialize(Nd4j.linspace(1, 5, 5), null);
} catch (Exception e) {
//Failed for some unknown reason during serialization - should never happen
throw new RuntimeException(KRYO_EXCEPTION_MSG, e);
}
if (bb == null) {
//Should probably never happen
throw new RuntimeException(KRYO_EXCEPTION_MSG + "\n(Got: null ByteBuffer from Spark SerializerInstance)");
} else {
//Could serialize successfully, but still may not be able to deserialize if kryo config is wrong
boolean equals;
INDArray deserialized;
try {
deserialized = si.deserialize(bb, null);
//Equals method may fail on malformed INDArrays, hence should be within the try-catch
equals = Nd4j.linspace(1, 5, 5).equals(deserialized);
} catch (Exception e) {
throw new RuntimeException(KRYO_EXCEPTION_MSG, e);
}
if (!equals) {
throw new RuntimeException(KRYO_EXCEPTION_MSG + "\n(Error during deserialization: test array" + " was not deserialized successfully)");
}
//Otherwise: serialization/deserialization was successful using Kryo
return true;
}
}
}
return true;
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class BaseSparkTest method getBasicSparkDataSet.
protected JavaRDD<DataSet> getBasicSparkDataSet(int nRows, INDArray input, INDArray labels) {
List<DataSet> list = new ArrayList<>();
for (int i = 0; i < nRows; i++) {
INDArray inRow = input.getRow(i).dup();
INDArray outRow = labels.getRow(i).dup();
DataSet ds = new DataSet(inRow, outRow);
list.add(ds);
}
list.iterator();
data = new DataSet().merge(list);
return sc.parallelize(list);
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class BaseVaeScoreWithKeyFunctionAdapter method call.
@Override
public Iterable<Tuple2<K, Double>> call(Iterator<Tuple2<K, INDArray>> iterator) throws Exception {
if (!iterator.hasNext()) {
return Collections.emptyList();
}
VariationalAutoencoder vae = getVaeLayer();
List<Tuple2<K, Double>> ret = new ArrayList<>();
List<INDArray> collect = new ArrayList<>(batchSize);
List<K> collectKey = new ArrayList<>(batchSize);
int totalCount = 0;
while (iterator.hasNext()) {
collect.clear();
collectKey.clear();
int nExamples = 0;
while (iterator.hasNext() && nExamples < batchSize) {
Tuple2<K, INDArray> t2 = iterator.next();
INDArray features = t2._2();
int n = features.size(0);
if (n != 1)
throw new IllegalStateException("Cannot score examples with one key per data set if " + "data set contains more than 1 example (numExamples: " + n + ")");
collect.add(features);
collectKey.add(t2._1());
nExamples += n;
}
totalCount += nExamples;
INDArray toScore = Nd4j.vstack(collect);
INDArray scores = computeScore(vae, toScore);
double[] doubleScores = scores.data().asDouble();
for (int i = 0; i < doubleScores.length; i++) {
ret.add(new Tuple2<>(collectKey.get(i), doubleScores[i]));
}
}
if (Nd4j.getExecutioner() instanceof GridExecutioner)
((GridExecutioner) Nd4j.getExecutioner()).flushQueueBlocking();
if (log.isDebugEnabled()) {
log.debug("Scored {} examples ", totalCount);
}
return ret;
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class CGVaeReconstructionErrorWithKeyFunction method getVaeLayer.
@Override
public VariationalAutoencoder getVaeLayer() {
ComputationGraph network = new ComputationGraph(ComputationGraphConfiguration.fromJson((String) jsonConfig.getValue()));
network.init();
INDArray val = ((INDArray) params.value()).unsafeDuplication();
if (val.length() != network.numParams(false))
throw new IllegalStateException("Network did not have same number of parameters as the broadcasted set parameters");
network.setParams(val);
Layer l = network.getLayer(0);
if (!(l instanceof VariationalAutoencoder)) {
throw new RuntimeException("Cannot use CGVaeReconstructionErrorWithKeyFunction on network that doesn't have a VAE " + "layer as layer 0. Layer type: " + l.getClass());
}
return (VariationalAutoencoder) l;
}
Aggregations