use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class BarnesHutTsne method score.
@Override
public double score() {
// Get estimate of normalization term
INDArray buff = Nd4j.create(numDimensions);
AtomicDouble sum_Q = new AtomicDouble(0.0);
for (int n = 0; n < N; n++) tree.computeNonEdgeForces(n, theta, buff, sum_Q);
// Loop over all edges to compute t-SNE error
double C = .0;
INDArray linear = Y;
for (int n = 0; n < N; n++) {
int begin = rows.getInt(n);
int end = rows.getInt(n + 1);
int ind1 = n;
for (int i = begin; i < end; i++) {
int ind2 = cols.getInt(i);
buff.assign(linear.slice(ind1));
buff.subi(linear.slice(ind2));
double Q = pow(buff, 2).sum(Integer.MAX_VALUE).getDouble(0);
Q = (1.0 / (1.0 + Q)) / sum_Q.doubleValue();
C += vals.getDouble(i) * FastMath.log(vals.getDouble(i) + Nd4j.EPS_THRESHOLD) / (Q + Nd4j.EPS_THRESHOLD);
}
}
return C;
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class BarnesHutTsne method computeGaussianKernel.
/**
* Computes a gaussian kernel
* given a vector of squared distance distances
*
* @param distances
* @param beta
* @return
*/
public Pair<INDArray, Double> computeGaussianKernel(INDArray distances, double beta, int k) {
// Compute Gaussian kernel row
INDArray currP = Nd4j.create(k);
for (int m = 0; m < k; m++) currP.putScalar(m, FastMath.exp(-beta * distances.getDouble(m + 1)));
double sum = currP.sum(Integer.MAX_VALUE).getDouble(0);
double h = 0.0;
for (int m = 0; m < k; m++) h += beta * (distances.getDouble(m + 1) * currP.getDouble(m));
h = (h / sum) + FastMath.log(sum);
return new Pair<>(currP, h);
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class BarnesHutTsne method symmetrized.
/**
* Symmetrize the value matrix
* @param rowP
* @param colP
* @param valP
* @return
*/
public INDArray symmetrized(INDArray rowP, INDArray colP, INDArray valP) {
INDArray rowCounts = Nd4j.create(N);
for (int n = 0; n < N; n++) {
int begin = rowP.getInt(n);
int end = rowP.getInt(n + 1);
for (int i = begin; i < end; i++) {
boolean present = false;
for (int m = rowP.getInt(colP.getInt(i)); m < rowP.getInt(colP.getInt(i) + 1); m++) if (colP.getInt(m) == n) {
present = true;
}
if (present)
rowCounts.putScalar(n, rowCounts.getDouble(n) + 1);
else {
rowCounts.putScalar(n, rowCounts.getDouble(n) + 1);
rowCounts.putScalar(colP.getInt(i), rowCounts.getDouble(colP.getInt(i)) + 1);
}
}
}
int numElements = rowCounts.sum(Integer.MAX_VALUE).getInt(0);
INDArray offset = Nd4j.create(N);
INDArray symRowP = Nd4j.create(N + 1);
INDArray symColP = Nd4j.create(numElements);
INDArray symValP = Nd4j.create(numElements);
for (int n = 0; n < N; n++) symRowP.putScalar(n + 1, symRowP.getDouble(n) + rowCounts.getDouble(n));
for (int n = 0; n < N; n++) {
for (int i = rowP.getInt(n); i < rowP.getInt(n + 1); i++) {
boolean present = false;
for (int m = rowP.getInt(colP.getInt(i)); m < rowP.getInt(colP.getInt(i)) + 1; m++) {
if (colP.getInt(m) == n) {
present = true;
if (n < colP.getInt(i)) {
// make sure we do not add elements twice
symColP.putScalar(symRowP.getInt(n) + offset.getInt(n), colP.getInt(i));
symColP.putScalar(symRowP.getInt(colP.getInt(i)) + offset.getInt(colP.getInt(i)), n);
symValP.putScalar(symRowP.getInt(n) + offset.getInt(n), valP.getDouble(i) + valP.getDouble(m));
symValP.putScalar(symRowP.getInt(colP.getInt(i)) + offset.getInt(colP.getInt(i)), valP.getDouble(i) + valP.getDouble(m));
}
}
}
// If (colP[i], n) is not present, there is no addition involved
if (!present) {
int colPI = colP.getInt(i);
if (n < colPI) {
symColP.putScalar(symRowP.getInt(n) + offset.getInt(n), colPI);
symColP.putScalar(symRowP.getInt(colP.getInt(i)) + offset.getInt(colPI), n);
symValP.putScalar(symRowP.getInt(n) + offset.getInt(n), valP.getDouble(i));
symValP.putScalar(symRowP.getInt(colPI) + offset.getInt(colPI), valP.getDouble(i));
}
}
// Update offsets
if (!present || (present && n < colP.getInt(i))) {
offset.putScalar(n, offset.getInt(n) + 1);
int colPI = colP.getInt(i);
if (colPI != n)
offset.putScalar(colPI, offset.getDouble(colPI) + 1);
}
}
}
// Divide the result by two
symValP.divi(2.0);
return symValP;
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class RecordReaderDataSetIterator method getDataSet.
private DataSet getDataSet(List<Writable> record) {
List<Writable> currList;
if (record instanceof List)
currList = record;
else
currList = new ArrayList<>(record);
//allow people to specify label index as -1 and infer the last possible label
if (numPossibleLabels >= 1 && labelIndex < 0) {
labelIndex = record.size() - 1;
}
INDArray label = null;
INDArray featureVector = null;
int featureCount = 0;
int labelCount = 0;
//no labels
if (currList.size() == 2 && currList.get(1) instanceof NDArrayWritable && currList.get(0) instanceof NDArrayWritable && currList.get(0) == currList.get(1)) {
NDArrayWritable writable = (NDArrayWritable) currList.get(0);
return new DataSet(writable.get(), writable.get());
}
if (currList.size() == 2 && currList.get(0) instanceof NDArrayWritable) {
if (!regression) {
label = FeatureUtil.toOutcomeVector((int) Double.parseDouble(currList.get(1).toString()), numPossibleLabels);
} else {
if (currList.get(1) instanceof NDArrayWritable) {
label = ((NDArrayWritable) currList.get(1)).get();
} else {
label = Nd4j.scalar(currList.get(1).toDouble());
}
}
NDArrayWritable ndArrayWritable = (NDArrayWritable) currList.get(0);
featureVector = ndArrayWritable.get();
return new DataSet(featureVector, label);
}
for (int j = 0; j < currList.size(); j++) {
Writable current = currList.get(j);
//ndarray writable is an insane slow down herecd
if (!(current instanceof NDArrayWritable) && current.toString().isEmpty())
continue;
if (regression && j == labelIndex && j == labelIndexTo && current instanceof NDArrayWritable) {
//Case: NDArrayWritable for the labels
label = ((NDArrayWritable) current).get();
} else if (regression && j >= labelIndex && j <= labelIndexTo) {
//This is the multi-label regression case
if (label == null)
label = Nd4j.create(1, (labelIndexTo - labelIndex + 1));
label.putScalar(labelCount++, current.toDouble());
} else if (labelIndex >= 0 && j == labelIndex) {
//single label case (classification, etc)
if (converter != null)
try {
current = converter.convert(current);
} catch (WritableConverterException e) {
e.printStackTrace();
}
if (numPossibleLabels < 1)
throw new IllegalStateException("Number of possible labels invalid, must be >= 1");
if (regression) {
label = Nd4j.scalar(current.toDouble());
} else {
int curr = current.toInt();
if (curr < 0 || curr >= numPossibleLabels) {
throw new DL4JInvalidInputException("Invalid classification data: expect label value (at label index column = " + labelIndex + ") to be in range 0 to " + (numPossibleLabels - 1) + " inclusive (0 to numClasses-1, with numClasses=" + numPossibleLabels + "); got label value of " + current);
}
label = FeatureUtil.toOutcomeVector(curr, numPossibleLabels);
}
} else {
try {
double value = current.toDouble();
if (featureVector == null) {
if (regression && labelIndex >= 0) {
//Handle the possibly multi-label regression case here:
int nLabels = labelIndexTo - labelIndex + 1;
featureVector = Nd4j.create(1, currList.size() - nLabels);
} else {
//Classification case, and also no-labels case
featureVector = Nd4j.create(labelIndex >= 0 ? currList.size() - 1 : currList.size());
}
}
featureVector.putScalar(featureCount++, value);
} catch (UnsupportedOperationException e) {
// This isn't a scalar, so check if we got an array already
if (current instanceof NDArrayWritable) {
assert featureVector == null;
featureVector = ((NDArrayWritable) current).get();
} else {
throw e;
}
}
}
}
return new DataSet(featureVector, labelIndex >= 0 ? label : featureVector);
}
use of org.nd4j.linalg.api.ndarray.INDArray in project deeplearning4j by deeplearning4j.
the class RecordReaderMultiDataSetIterator method nextMultiDataSet.
private MultiDataSet nextMultiDataSet(Map<String, List<List<Writable>>> nextRRVals, Map<String, List<List<List<Writable>>>> nextSeqRRVals, List<RecordMetaDataComposableMap> nextMetas) {
int minExamples = Integer.MAX_VALUE;
for (List<List<Writable>> exampleData : nextRRVals.values()) {
minExamples = Math.min(minExamples, exampleData.size());
}
for (List<List<List<Writable>>> exampleData : nextSeqRRVals.values()) {
minExamples = Math.min(minExamples, exampleData.size());
}
if (minExamples == Integer.MAX_VALUE)
//Should never happen
throw new RuntimeException("Error occurred during data set generation: no readers?");
//In order to align data at the end (for each example individually), we need to know the length of the
// longest time series for each example
int[] longestSequence = null;
if (alignmentMode == AlignmentMode.ALIGN_END) {
longestSequence = new int[minExamples];
for (Map.Entry<String, List<List<List<Writable>>>> entry : nextSeqRRVals.entrySet()) {
List<List<List<Writable>>> list = entry.getValue();
for (int i = 0; i < list.size() && i < minExamples; i++) {
longestSequence[i] = Math.max(longestSequence[i], list.get(i).size());
}
}
}
//Second: create the input arrays
//To do this, we need to know longest time series length, so we can do padding
int longestTS = -1;
if (alignmentMode != AlignmentMode.EQUAL_LENGTH) {
for (Map.Entry<String, List<List<List<Writable>>>> entry : nextSeqRRVals.entrySet()) {
List<List<List<Writable>>> list = entry.getValue();
for (List<List<Writable>> c : list) {
longestTS = Math.max(longestTS, c.size());
}
}
}
INDArray[] inputArrs = new INDArray[inputs.size()];
INDArray[] inputArrMasks = new INDArray[inputs.size()];
boolean inputMasks = false;
int i = 0;
for (SubsetDetails d : inputs) {
if (nextRRVals.containsKey(d.readerName)) {
//Standard reader
List<List<Writable>> list = nextRRVals.get(d.readerName);
inputArrs[i] = convertWritables(list, minExamples, d);
} else {
//Sequence reader
List<List<List<Writable>>> list = nextSeqRRVals.get(d.readerName);
Pair<INDArray, INDArray> p = convertWritablesSequence(list, minExamples, longestTS, d, longestSequence);
inputArrs[i] = p.getFirst();
inputArrMasks[i] = p.getSecond();
if (inputArrMasks[i] != null)
inputMasks = true;
}
i++;
}
if (!inputMasks)
inputArrMasks = null;
//Third: create the outputs
INDArray[] outputArrs = new INDArray[outputs.size()];
INDArray[] outputArrMasks = new INDArray[outputs.size()];
boolean outputMasks = false;
i = 0;
for (SubsetDetails d : outputs) {
if (nextRRVals.containsKey(d.readerName)) {
//Standard reader
List<List<Writable>> list = nextRRVals.get(d.readerName);
outputArrs[i] = convertWritables(list, minExamples, d);
} else {
//Sequence reader
List<List<List<Writable>>> list = nextSeqRRVals.get(d.readerName);
Pair<INDArray, INDArray> p = convertWritablesSequence(list, minExamples, longestTS, d, longestSequence);
outputArrs[i] = p.getFirst();
outputArrMasks[i] = p.getSecond();
if (outputArrMasks[i] != null)
outputMasks = true;
}
i++;
}
if (!outputMasks)
outputArrMasks = null;
MultiDataSet mds = new org.nd4j.linalg.dataset.MultiDataSet(inputArrs, outputArrs, inputArrMasks, outputArrMasks);
if (collectMetaData) {
mds.setExampleMetaData(nextMetas);
}
if (preProcessor != null)
preProcessor.preProcess(mds);
return mds;
}
Aggregations