use of org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException in project ignite by apache.
the class SandboxMLCache method fillCacheWith.
/**
* Fills cache with data and returns it.
*
* @param dataset The chosen dataset.
* @return Filled Ignite Cache.
* @throws FileNotFoundException If file not found.
*/
public IgniteCache<Integer, Vector> fillCacheWith(MLSandboxDatasets dataset) throws FileNotFoundException {
IgniteCache<Integer, Vector> cache = getCache();
String fileName = dataset.getFileName();
File file = IgniteUtils.resolveIgnitePath(fileName);
if (file == null)
throw new FileNotFoundException(fileName);
Scanner scanner = new Scanner(file);
int cnt = 0;
while (scanner.hasNextLine()) {
String row = scanner.nextLine();
if (dataset.hasHeader() && cnt == 0) {
cnt++;
continue;
}
String[] cells = row.split(dataset.getSeparator());
double[] data = new double[cells.length];
NumberFormat format = NumberFormat.getInstance(Locale.FRANCE);
for (int i = 0; i < cells.length; i++) try {
if (cells[i].isEmpty())
data[i] = Double.NaN;
else
data[i] = Double.valueOf(cells[i]);
} catch (NumberFormatException e) {
try {
data[i] = format.parse(cells[i]).doubleValue();
} catch (ParseException e1) {
throw new FileParsingException(cells[i], i, Paths.get(dataset.getFileName()));
}
}
cache.put(cnt++, VectorUtils.of(data));
}
return cache;
}
use of org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException in project ignite by apache.
the class LabeledDatasetLoader method parseFeatures.
/**
*/
@NotNull
private static Vector parseFeatures(Path pathToFile, boolean isFallOnBadData, int colSize, int rowIdx, String[] rowData) {
final Vector vec = LabeledVectorSet.emptyVector(colSize);
if (isFallOnBadData && rowData.length != colSize + 1)
throw new CardinalityException(colSize + 1, rowData.length);
double missedData = fillMissedData();
for (int j = 0; j < colSize; j++) {
try {
double feature = Double.parseDouble(rowData[j + 1]);
vec.set(j, feature);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[j + 1], rowIdx, pathToFile);
else
vec.set(j, missedData);
} catch (ArrayIndexOutOfBoundsException e) {
vec.set(j, missedData);
}
}
return vec;
}
use of org.apache.ignite.ml.math.exceptions.datastructures.FileParsingException in project ignite by apache.
the class LabeledDatasetLoader method loadFromTxtFile.
/**
* Datafile should keep class labels in the first column.
*
* @param pathToFile Path to file.
* @param separator Element to tokenize row on separate tokens.
* @param isFallOnBadData Fall on incorrect data if true.
* @return Labeled Dataset parsed from file.
*/
public static LabeledVectorSet loadFromTxtFile(Path pathToFile, String separator, boolean isFallOnBadData) throws IOException {
Stream<String> stream = Files.lines(pathToFile);
List<String> list = new ArrayList<>();
stream.forEach(list::add);
final int rowSize = list.size();
List<Double> labels = new ArrayList<>();
List<Vector> vectors = new ArrayList<>();
if (rowSize > 0) {
final int colSize = getColumnSize(separator, list) - 1;
if (colSize > 0) {
for (int i = 0; i < rowSize; i++) {
Double clsLb;
String[] rowData = list.get(i).split(separator);
try {
clsLb = Double.parseDouble(rowData[0]);
Vector vec = parseFeatures(pathToFile, isFallOnBadData, colSize, i, rowData);
labels.add(clsLb);
vectors.add(vec);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[0], i, pathToFile);
}
}
LabeledVector[] data = new LabeledVector[vectors.size()];
for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
return new LabeledVectorSet(data, colSize);
} else
throw new NoDataException("File should contain first row with data");
} else
throw new EmptyFileException(pathToFile.toString());
}
Aggregations