Search in sources :

Example 1 with NoDataException

use of org.apache.ignite.ml.math.exceptions.math.NoDataException in project ignite by apache.

the class LabeledDatasetLoader method loadFromTxtFile.

/**
 * Datafile should keep class labels in the first column.
 *
 * @param pathToFile Path to file.
 * @param separator Element to tokenize row on separate tokens.
 * @param isFallOnBadData Fall on incorrect data if true.
 * @return Labeled Dataset parsed from file.
 */
public static LabeledVectorSet loadFromTxtFile(Path pathToFile, String separator, boolean isFallOnBadData) throws IOException {
    Stream<String> stream = Files.lines(pathToFile);
    List<String> list = new ArrayList<>();
    stream.forEach(list::add);
    final int rowSize = list.size();
    List<Double> labels = new ArrayList<>();
    List<Vector> vectors = new ArrayList<>();
    if (rowSize > 0) {
        final int colSize = getColumnSize(separator, list) - 1;
        if (colSize > 0) {
            for (int i = 0; i < rowSize; i++) {
                Double clsLb;
                String[] rowData = list.get(i).split(separator);
                try {
                    clsLb = Double.parseDouble(rowData[0]);
                    Vector vec = parseFeatures(pathToFile, isFallOnBadData, colSize, i, rowData);
                    labels.add(clsLb);
                    vectors.add(vec);
                } catch (NumberFormatException e) {
                    if (isFallOnBadData)
                        throw new FileParsingException(rowData[0], i, pathToFile);
                }
            }
            LabeledVector[] data = new LabeledVector[vectors.size()];
            for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
            return new LabeledVectorSet(data, colSize);
        } else
            throw new NoDataException("File should contain first row with data");
    } else
        throw new EmptyFileException(pathToFile.toString());
}
Also used : FileParsingException(org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException) ArrayList(java.util.ArrayList) LabeledVector(org.apache.ignite.ml.structures.LabeledVector) LabeledVectorSet(org.apache.ignite.ml.structures.LabeledVectorSet) EmptyFileException(org.apache.ignite.ml.math.exceptions.datastructures.EmptyFileException) NoDataException(org.apache.ignite.ml.math.exceptions.math.NoDataException) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) LabeledVector(org.apache.ignite.ml.structures.LabeledVector)

Aggregations

ArrayList (java.util.ArrayList)1 EmptyFileException (org.apache.ignite.ml.math.exceptions.datastructures.EmptyFileException)1 FileParsingException (org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException)1 NoDataException (org.apache.ignite.ml.math.exceptions.math.NoDataException)1 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)1 LabeledVector (org.apache.ignite.ml.structures.LabeledVector)1 LabeledVectorSet (org.apache.ignite.ml.structures.LabeledVectorSet)1