use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.
the class IgniteKNNRegressionBenchmark method test.
/**
* {@inheritDoc}
*/
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
// IMPL NOTE originally taken from KNNRegressionExample.
// Obtain shuffled dataset.
LabeledDataset dataset = new Datasets().shuffleClearedMachines((int) (DataChanger.next()));
// Normalize dataset
Normalizer.normalizeWithMiniMax(dataset);
// Random splitting of iris data as 80% train and 20% test datasets.
LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
LabeledDataset test = split.test();
LabeledDataset train = split.train();
// Builds weighted kNN-regression with Manhattan Distance.
KNNModel knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
// Clone labels
final double[] labels = test.labels();
// Calculate predicted classes.
for (int i = 0; i < test.rowSize() - 1; i++) knnMdl.apply(test.getRow(i).features());
}
});
igniteThread.start();
igniteThread.join();
return true;
}
use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.
the class LabeledDatasetLoader method loadFromTxtFile.
/**
* Datafile should keep class labels in the first column.
*
* @param pathToFile Path to file.
* @param separator Element to tokenize row on separate tokens.
* @param isDistributed Generates distributed dataset if true.
* @param isFallOnBadData Fall on incorrect data if true.
* @return Labeled Dataset parsed from file.
*/
public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
Stream<String> stream = Files.lines(pathToFile);
List<String> list = new ArrayList<>();
stream.forEach(list::add);
final int rowSize = list.size();
List<Double> labels = new ArrayList<>();
List<Vector> vectors = new ArrayList<>();
if (rowSize > 0) {
final int colSize = getColumnSize(separator, list) - 1;
if (colSize > 0) {
for (int i = 0; i < rowSize; i++) {
Double clsLb;
String[] rowData = list.get(i).split(separator);
try {
clsLb = Double.parseDouble(rowData[0]);
Vector vec = parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
labels.add(clsLb);
vectors.add(vec);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[0], i, pathToFile);
}
}
LabeledVector[] data = new LabeledVector[vectors.size()];
for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
return new LabeledDataset(data, colSize);
} else
throw new NoDataException("File should contain first row with data");
} else
throw new EmptyFileException(pathToFile.toString());
}
use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.
the class Deltas method fit.
/**
* Trains model based on the specified data.
*
* @param datasetBuilder Dataset builder.
* @param featureExtractor Feature extractor.
* @param lbExtractor Label extractor.
* @param cols Number of columns.
* @return Model.
*/
@Override
public SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int cols) {
assert datasetBuilder != null;
PartitionDataBuilder<K, V, SVMPartitionContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new SVMPartitionDataBuilderOnHeap<>(featureExtractor, lbExtractor, cols);
Vector weights;
try (Dataset<SVMPartitionContext, LabeledDataset<Double, LabeledVector>> dataset = datasetBuilder.build((upstream, upstreamSize) -> new SVMPartitionContext(), partDataBuilder)) {
final int weightVectorSizeWithIntercept = cols + 1;
weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
for (int i = 0; i < this.getAmountOfIterations(); i++) {
Vector deltaWeights = calculateUpdates(weights, dataset);
// creates new vector
weights = weights.plus(deltaWeights);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return new SVMLinearBinaryClassificationModel(weights.viewPart(1, weights.size() - 1), weights.get(0));
}
use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.
the class KNNClassificationTest method testBinaryClassificationTest.
/**
*/
public void testBinaryClassificationTest() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
LabeledDataset training = new LabeledDataset(mtx, lbs);
KNNModel knnMdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
Vector firstVector = new DenseLocalOnHeapVector(new double[] { 2.0, 2.0 });
assertEquals(knnMdl.apply(firstVector), 1.0);
Vector secondVector = new DenseLocalOnHeapVector(new double[] { -2.0, -2.0 });
assertEquals(knnMdl.apply(secondVector), 2.0);
}
use of org.apache.ignite.ml.structures.LabeledDataset in project ignite by apache.
the class KNNClassificationTest method testBinaryClassificationFarPointsWithWeightedStrategy.
/**
*/
public void testBinaryClassificationFarPointsWithWeightedStrategy() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
double[][] mtx = new double[][] { { 10.0, 10.0 }, { 10.0, 20.0 }, { -1, -1 }, { -2, -2 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
LabeledDataset training = new LabeledDataset(mtx, lbs);
KNNModel knnMdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.WEIGHTED, training);
Vector vector = new DenseLocalOnHeapVector(new double[] { -1.01, -1.01 });
assertEquals(knnMdl.apply(vector), 1.0);
}
Aggregations