use of org.apache.ignite.ml.math.exceptions.NoDataException in project ignite by apache.
the class AbstractMultipleLinearRegression method newXSampleData.
/**
* <p>Loads new x sample data, overriding any previous data.
* </p>
* The input <code>x</code> array should have one row for each sample
* observation, with columns corresponding to independent variables.
* For example, if <pre>
* <code> x = new double[][] {{1, 2}, {3, 4}, {5, 6}} </code></pre>
* then <code>setXSampleData(x) </code> results in a model with two independent
* variables and 3 observations:
* <pre>
* x[0] x[1]
* ----------
* 1 2
* 3 4
* 5 6
* </pre>
* </p>
* <p>Note that there is no need to add an initial unitary column (column of 1's) when
* specifying a model including an intercept term.
* </p>
*
* @param x the rectangular array representing the x sample
* @throws NullArgumentException if x is null
* @throws NoDataException if x is empty
* @throws CardinalityException if x is not rectangular
*/
protected void newXSampleData(Matrix x) {
if (x == null)
throw new NullArgumentException();
if (x.rowSize() == 0)
throw new NoDataException();
if (noIntercept)
// TODO: Should we copy here?
xMatrix = x;
else {
// Augment design matrix with initial unitary column
xMatrix = MatrixUtil.like(x, x.rowSize(), x.columnSize() + 1);
xMatrix.viewColumn(0).map(Functions.constant(1.0));
xMatrix.viewPart(0, x.rowSize(), 1, x.columnSize()).assign(x);
}
}
use of org.apache.ignite.ml.math.exceptions.NoDataException in project ignite by apache.
the class LabeledDatasetLoader method loadFromTxtFile.
/**
* Datafile should keep class labels in the first column.
*
* @param pathToFile Path to file.
* @param separator Element to tokenize row on separate tokens.
* @param isDistributed Generates distributed dataset if true.
* @param isFallOnBadData Fall on incorrect data if true.
* @return Labeled Dataset parsed from file.
*/
public static LabeledDataset loadFromTxtFile(Path pathToFile, String separator, boolean isDistributed, boolean isFallOnBadData) throws IOException {
Stream<String> stream = Files.lines(pathToFile);
List<String> list = new ArrayList<>();
stream.forEach(list::add);
final int rowSize = list.size();
List<Double> labels = new ArrayList<>();
List<Vector> vectors = new ArrayList<>();
if (rowSize > 0) {
final int colSize = getColumnSize(separator, list) - 1;
if (colSize > 0) {
for (int i = 0; i < rowSize; i++) {
Double clsLb;
String[] rowData = list.get(i).split(separator);
try {
clsLb = Double.parseDouble(rowData[0]);
Vector vec = parseFeatures(pathToFile, isDistributed, isFallOnBadData, colSize, i, rowData);
labels.add(clsLb);
vectors.add(vec);
} catch (NumberFormatException e) {
if (isFallOnBadData)
throw new FileParsingException(rowData[0], i, pathToFile);
}
}
LabeledVector[] data = new LabeledVector[vectors.size()];
for (int i = 0; i < vectors.size(); i++) data[i] = new LabeledVector(vectors.get(i), labels.get(i));
return new LabeledDataset(data, colSize);
} else
throw new NoDataException("File should contain first row with data");
} else
throw new EmptyFileException(pathToFile.toString());
}
Aggregations