use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.
the class MnistDistributed method loadIntoCache.
/**
* Load MNIST into cache.
*
* @param trainingMnistLst List with mnist data.
* @param labeledVectorsCache Cache to load MNIST into.
*/
private void loadIntoCache(List<DenseLocalOnHeapVector> trainingMnistLst, IgniteCache<Integer, LabeledVector<Vector, Vector>> labeledVectorsCache) {
String cacheName = labeledVectorsCache.getName();
try (IgniteDataStreamer<Integer, LabeledVector<Vector, Vector>> streamer = ignite.dataStreamer(cacheName)) {
int sampleIdx = 0;
streamer.perNodeBufferSize(10000);
for (DenseLocalOnHeapVector vector : trainingMnistLst) {
streamer.addData(sampleIdx, asLabeledVector(vector, FEATURES_CNT));
if (sampleIdx % 5000 == 0)
X.println("Loaded " + sampleIdx + " samples.");
sampleIdx++;
}
}
}
use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.
the class LabeledDatasetTest method testAccessMethods.
/**
*/
public void testAccessMethods() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
final LabeledDataset dataset = new LabeledDataset(mtx, lbs, null, false);
assertEquals(dataset.colSize(), 2);
assertEquals(dataset.rowSize(), 6);
final LabeledVector<Vector, Double> row = (LabeledVector<Vector, Double>) dataset.getRow(0);
assertEquals(row.features().get(0), 1.0);
assertEquals(row.label(), 1.0);
dataset.setLabel(0, 2.0);
assertEquals(row.label(), 2.0);
}
use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.
the class MLPGroupTrainerExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
// IMPL NOTE based on MLPGroupTrainerTest#testXOR
System.out.println(">>> Distributed multilayer perceptron example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), MLPGroupTrainerExample.class.getSimpleName(), () -> {
int samplesCnt = 10000;
Matrix xorInputs = new DenseLocalOnHeapMatrix(new double[][] { { 0.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 1.0, 1.0 } }, StorageConstants.ROW_STORAGE_MODE).transpose();
Matrix xorOutputs = new DenseLocalOnHeapMatrix(new double[][] { { 0.0 }, { 1.0 }, { 1.0 }, { 0.0 } }, StorageConstants.ROW_STORAGE_MODE).transpose();
MLPArchitecture conf = new MLPArchitecture(2).withAddedLayer(10, true, Activators.RELU).withAddedLayer(1, false, Activators.SIGMOID);
IgniteCache<Integer, LabeledVector<Vector, Vector>> cache = LabeledVectorsCache.createNew(ignite);
String cacheName = cache.getName();
Random rnd = new Random(12345L);
try (IgniteDataStreamer<Integer, LabeledVector<Vector, Vector>> streamer = ignite.dataStreamer(cacheName)) {
streamer.perNodeBufferSize(100);
for (int i = 0; i < samplesCnt; i++) {
int col = Math.abs(rnd.nextInt()) % 4;
streamer.addData(i, new LabeledVector<>(xorInputs.getCol(col), xorOutputs.getCol(col)));
}
}
int totalCnt = 100;
int failCnt = 0;
MLPGroupUpdateTrainer<RPropParameterUpdate> trainer = MLPGroupUpdateTrainer.getDefault(ignite).withSyncPeriod(3).withTolerance(0.001).withMaxGlobalSteps(20);
for (int i = 0; i < totalCnt; i++) {
MLPGroupUpdateTrainerCacheInput trainerInput = new MLPGroupUpdateTrainerCacheInput(conf, new RandomInitializer(rnd), 6, cache, 10);
MultilayerPerceptron mlp = trainer.train(trainerInput);
Matrix predict = mlp.apply(xorInputs);
System.out.println(">>> Prediction data at step " + i + " of total " + totalCnt + ":");
Tracer.showAscii(predict);
System.out.println("Difference estimate: " + xorOutputs.getRow(0).minus(predict.getRow(0)).kNorm(2));
failCnt += closeEnough(xorOutputs.getRow(0), predict.getRow(0)) ? 0 : 1;
}
double failRatio = (double) failCnt / totalCnt;
System.out.println("\n>>> Fail percentage: " + (failRatio * 100) + "%.");
System.out.println("\n>>> Distributed multilayer perceptron example completed.");
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.
the class LabeledDatasetLoader method loadFromTxtFile.
/**
* Datafile should keep class labels in the first column.
*
* @param pathToFile Path to file.
* @param separator Element to tokenize row on separate tokens.
* @param isDistributed Generates distributed dataset if true.
* @param isFallOnBadData Fall on incorrect data if true.
* @return Labeled Dataset parsed from file.
*/
public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
Stream<String> stream = Files.lines(pathToFile);
List<String> list = new ArrayList<>();
stream.forEach(list::add);
final int rowSize = list.size();
List<Double> labels = new ArrayList<>();
List<Vector> vectors = new ArrayList<>();
if (rowSize > 0) {
final int colSize = getColumnSize(separator, list) - 1;
if (colSize > 0) {
for (int i = 0; i < rowSize; i++) {
Double clsLb;
String[] rowData = list.get(i).split(separator);
try {
clsLb = Double.parseDouble(rowData[0]);
Vector vec = parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
labels.add(clsLb);
vectors.add(vec);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[0], i, pathToFile);
}
}
LabeledVector[] data = new LabeledVector[vectors.size()];
for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
return new LabeledDataset(data, colSize);
} else
throw new NoDataException("File should contain first row with data");
} else
throw new EmptyFileException(pathToFile.toString());
}
use of org.apache.ignite.ml.structures.LabeledVector in project ignite by apache.
the class Deltas method fit.
/**
* Trains model based on the specified data.
*
* @param datasetBuilder Dataset builder.
* @param featureExtractor Feature extractor.
* @param lbExtractor Label extractor.
* @param cols Number of columns.
* @return Model.
*/
@Override
public SVMLinearBinaryClassificationModel fit(DatasetBuilder<K, V> datasetBuilder, IgniteBiFunction<K, V, double[]> featureExtractor, IgniteBiFunction<K, V, Double> lbExtractor, int cols) {
assert datasetBuilder != null;
PartitionDataBuilder<K, V, SVMPartitionContext, LabeledDataset<Double, LabeledVector>> partDataBuilder = new SVMPartitionDataBuilderOnHeap<>(featureExtractor, lbExtractor, cols);
Vector weights;
try (Dataset<SVMPartitionContext, LabeledDataset<Double, LabeledVector>> dataset = datasetBuilder.build((upstream, upstreamSize) -> new SVMPartitionContext(), partDataBuilder)) {
final int weightVectorSizeWithIntercept = cols + 1;
weights = initializeWeightsWithZeros(weightVectorSizeWithIntercept);
for (int i = 0; i < this.getAmountOfIterations(); i++) {
Vector deltaWeights = calculateUpdates(weights, dataset);
// creates new vector
weights = weights.plus(deltaWeights);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return new SVMLinearBinaryClassificationModel(weights.viewPart(1, weights.size() - 1), weights.get(0));
}
Aggregations