use of org.apache.ignite.ml.math.exceptions.knn.FileParsingException in project ignite by apache.
the class LabeledDatasetLoader method parseFeatures.
/**
*/
@NotNull
private static Vector parseFeatures(Path pathToFile, boolean isDistributed, boolean isFallOnBadData, int colSize, int rowIdx, String[] rowData) {
final Vector vec = LabeledDataset.emptyVector(colSize, isDistributed);
if (isFallOnBadData && rowData.length != colSize + 1)
throw new CardinalityException(colSize + 1, rowData.length);
double missedData = fillMissedData();
for (int j = 0; j < colSize; j++) {
try {
double feature = Double.parseDouble(rowData[j + 1]);
vec.set(j, feature);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[j + 1], rowIdx, pathToFile);
else
vec.set(j, missedData);
} catch (ArrayIndexOutOfBoundsException e) {
vec.set(j, missedData);
}
}
return vec;
}
use of org.apache.ignite.ml.math.exceptions.knn.FileParsingException in project ignite by apache.
the class LabeledDatasetLoader method loadFromTxtFile.
/**
* Datafile should keep class labels in the first column.
*
* @param pathToFile Path to file.
* @param separator Element to tokenize row on separate tokens.
* @param isDistributed Generates distributed dataset if true.
* @param isFallOnBadData Fall on incorrect data if true.
* @return Labeled Dataset parsed from file.
*/
public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
Stream<String> stream = Files.lines(pathToFile);
List<String> list = new ArrayList<>();
stream.forEach(list::add);
final int rowSize = list.size();
List<Double> labels = new ArrayList<>();
List<Vector> vectors = new ArrayList<>();
if (rowSize > 0) {
final int colSize = getColumnSize(separator, list) - 1;
if (colSize > 0) {
for (int i = 0; i < rowSize; i++) {
Double clsLb;
String[] rowData = list.get(i).split(separator);
try {
clsLb = Double.parseDouble(rowData[0]);
Vector vec = parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
labels.add(clsLb);
vectors.add(vec);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[0], i, pathToFile);
}
}
LabeledVector[] data = new LabeledVector[vectors.size()];
for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
return new LabeledDataset(data, colSize);
} else
throw new NoDataException("File should contain first row with data");
} else
throw new EmptyFileException(pathToFile.toString());
}
Aggregations