Search in sources :

Example 1 with FileParsingException

use of org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException in project ignite by apache.

the class SandboxMLCache method fillCacheWith.

/**
 * Fills cache with data and returns it.
 *
 * @param dataset The chosen dataset.
 * @return Filled Ignite Cache.
 * @throws FileNotFoundException If file not found.
 */
public IgniteCache<Integer, Vector> fillCacheWith(MLSandboxDatasets dataset) throws FileNotFoundException {
    IgniteCache<Integer, Vector> cache = getCache();
    String fileName = dataset.getFileName();
    File file = IgniteUtils.resolveIgnitePath(fileName);
    if (file == null)
        throw new FileNotFoundException(fileName);
    Scanner scanner = new Scanner(file);
    int cnt = 0;
    while (scanner.hasNextLine()) {
        String row = scanner.nextLine();
        if (dataset.hasHeader() && cnt == 0) {
            cnt++;
            continue;
        }
        String[] cells = row.split(dataset.getSeparator());
        double[] data = new double[cells.length];
        NumberFormat format = NumberFormat.getInstance(Locale.FRANCE);
        for (int i = 0; i < cells.length; i++) try {
            if (cells[i].isEmpty())
                data[i] = Double.NaN;
            else
                data[i] = Double.valueOf(cells[i]);
        } catch (NumberFormatException e) {
            try {
                data[i] = format.parse(cells[i]).doubleValue();
            } catch (ParseException e1) {
                throw new FileParsingException(cells[i], i, Paths.get(dataset.getFileName()));
            }
        }
        cache.put(cnt++, VectorUtils.of(data));
    }
    return cache;
}
Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException) Scanner(java.util.Scanner) FileNotFoundException(java.io.FileNotFoundException) ParseException(java.text.ParseException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) File(java.io.File) NumberFormat(java.text.NumberFormat)

Example 2 with FileParsingException

use of org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException in project ignite by apache.

the class LabeledDatasetLoader method parseFeatures.

/**
 */
@NotNull
private static Vector parseFeatures(Path pathToFile, boolean isFallOnBadData, int colSize, int rowIdx, String[] rowData) {
    final Vector vec = LabeledVectorSet.emptyVector(colSize);
    if (isFallOnBadData && rowData.length != colSize + 1)
        throw new CardinalityException(colSize + 1, rowData.length);
    double missedData = fillMissedData();
    for (int j = 0; j < colSize; j++) {
        try {
            double feature = Double.parseDouble(rowData[j + 1]);
            vec.set(j, feature);
        } catch (NumberFormatException e) {
            if (isFallOnBadData)
                throw new FileParsingException(rowData[j + 1], rowIdx, pathToFile);
            else
                vec.set(j, missedData);
        } catch (ArrayIndexOutOfBoundsException e) {
            vec.set(j, missedData);
        }
    }
    return vec;
}
Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException) CardinalityException(org.apache.ignite.ml.math.exceptions.math.CardinalityException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) NotNull(org.jetbrains.annotations.NotNull)

Example 3 with FileParsingException

use of org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException in project ignite by apache.

the class LabeledDatasetLoader method loadFromTxtFile.

/**
 * Datafile should keep class labels in the first column.
 *
 * @param pathToFile Path to file.
 * @param separator Element to tokenize row on separate tokens.
 * @param isFallOnBadData Fall on incorrect data if true.
 * @return Labeled Dataset parsed from file.
 */
public static LabeledVectorSet loadFromTxtFile(Path pathToFile, String separator, boolean isFallOnBadData) throws IOException {
    Stream<String> stream = Files.lines(pathToFile);
    List<String> list = new ArrayList<>();
    stream.forEach(list::add);
    final int rowSize = list.size();
    List<Double> labels = new ArrayList<>();
    List<Vector> vectors = new ArrayList<>();
    if (rowSize > 0) {
        final int colSize = getColumnSize(separator, list) - 1;
        if (colSize > 0) {
            for (int i = 0; i < rowSize; i++) {
                Double clsLb;
                String[] rowData = list.get(i).split(separator);
                try {
                    clsLb = Double.parseDouble(rowData[0]);
                    Vector vec = parseFeatures(pathToFile, isFallOnBadData, colSize, i, rowData);
                    labels.add(clsLb);
                    vectors.add(vec);
                } catch (NumberFormatException e) {
                    if (isFallOnBadData)
                        throw new FileParsingException(rowData[0], i, pathToFile);
                }
            }
            LabeledVector[] data = new LabeledVector[vectors.size()];
            for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
            return new LabeledVectorSet(data, colSize);
        } else
            throw new NoDataException("File should contain first row with data");
    } else
        throw new EmptyFileException(pathToFile.toString());
}
Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) EmptyFileException(org.apache.ignite.ml.math.exceptions.datastructures.EmptyFileException) NoDataException(org.apache.ignite.ml.math.exceptions.math.NoDataException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Aggregations

FileParsingException (org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException)3 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)3 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 NumberFormat (java.text.NumberFormat)1 ParseException (java.text.ParseException)1 ArrayList (java.util.ArrayList)1 Scanner (java.util.Scanner)1 EmptyFileException (org.apache.ignite.ml.math.exceptions.datastructures.EmptyFileException)1 CardinalityException (org.apache.ignite.ml.math.exceptions.math.CardinalityException)1 NoDataException (org.apache.ignite.ml.math.exceptions.math.NoDataException)1 LabeledVectorSet (org.apache.ignite.ml.structures.LabeledVectorSet)1 NotNull (org.jetbrains.annotations.NotNull)1