use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class LinRegLearnerNodeModel method loadInternals.
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
File inFile = new File(internDir, FILE_SAVE);
ModelContentRO c = ModelContent.loadFromXML(new BufferedInputStream(new GZIPInputStream(new FileInputStream(inFile))));
try {
m_nrRows = c.getInt(CFG_NR_ROWS);
m_nrRowsSkipped = c.getInt(CFG_NR_ROWS_SKIPPED);
m_error = c.getDouble(CFG_ERROR);
ModelContentRO specContent = c.getModelContent(CFG_SPEC);
DataTableSpec outSpec = DataTableSpec.load(specContent);
m_actualUsedColumns = specContent.getStringArray(CFG_USED_COLUMNS, (String[]) null);
ModelContentRO parContent = c.getModelContent(CFG_PARAMS);
m_params = LinearRegressionContent.instantiateAndLoad(parContent, outSpec);
} catch (InvalidSettingsException ise) {
IOException ioe = new IOException("Unable to restore state: " + ise.getMessage());
ioe.initCause(ise);
throw ioe;
}
File dataFile = new File(internDir, FILE_DATA);
ContainerTable t = DataContainer.readFromZip(dataFile);
int rowCount = t.getRowCount();
m_rowContainer = new DefaultDataArray(t, 1, rowCount, exec);
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class PolyRegLearnerNodeModel method loadInternals.
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
File f = new File(nodeInternDir, "internals.xml");
if (f.exists()) {
NodeSettingsRO internals = NodeSettings.loadFromXML(new BufferedInputStream(new FileInputStream(f)));
try {
double[] betas = internals.getDoubleArray("betas");
String[] columnNames = internals.getStringArray("columnNames");
double squaredError = internals.getDouble("squaredError");
double[] meanValues = internals.getDoubleArray("meanValues");
m_viewData = new PolyRegViewData(meanValues, betas, squaredError, columnNames, m_settings.getDegree(), m_settings.getTargetColumn());
} catch (InvalidSettingsException ex) {
throw new IOException("Old or corrupt internals", ex);
}
} else {
throw new FileNotFoundException("Internals do not exist");
}
f = new File(nodeInternDir, "data.zip");
if (f.exists()) {
ContainerTable t = DataContainer.readFromZip(f);
int rowCount = t.getRowCount();
m_rowContainer = new DefaultDataArray(t, 1, rowCount, exec);
} else {
throw new FileNotFoundException("Internals do not exist");
}
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class PolyRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getDataTableSpec();
final int colCount = inSpec.getNumColumns();
String[] selectedCols = computeSelectedColumns(inSpec);
Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
m_colSelected = new boolean[colCount];
for (int i = 0; i < colCount; i++) {
m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
}
final int rowCount = inTable.getRowCount();
final int independentVariables = selectedCols.length;
final int degree = m_settings.getDegree();
final int dependentIndex = inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn());
double[][] xMat = new double[rowCount][1 + independentVariables * degree];
double[][] yMat = new double[rowCount][1];
int rowIndex = 0;
for (DataRow row : inTable) {
exec.checkCanceled();
exec.setProgress(0.2 * rowIndex / rowCount);
xMat[rowIndex][0] = 1;
int colIndex = 1;
for (int i = 0; i < row.getNumCells(); i++) {
if ((m_colSelected[i] || (i == dependentIndex)) && row.getCell(i).isMissing()) {
throw new IllegalArgumentException("Missing values are not supported by this node.");
}
if (m_colSelected[i]) {
double val = ((DoubleValue) row.getCell(i)).getDoubleValue();
double poly = val;
xMat[rowIndex][colIndex] = poly;
colIndex++;
for (int d = 2; d <= degree; d++) {
poly *= val;
xMat[rowIndex][colIndex] = poly;
colIndex++;
}
} else if (i == dependentIndex) {
double val = ((DoubleValue) row.getCell(i)).getDoubleValue();
yMat[rowIndex][0] = val;
}
}
rowIndex++;
}
// compute X'
double[][] xTransMat = MathUtils.transpose(xMat);
exec.setProgress(0.24);
exec.checkCanceled();
// compute X'X
double[][] xxMat = MathUtils.multiply(xTransMat, xMat);
exec.setProgress(0.28);
exec.checkCanceled();
// compute X'Y
double[][] xyMat = MathUtils.multiply(xTransMat, yMat);
exec.setProgress(0.32);
exec.checkCanceled();
// compute (X'X)^-1
double[][] xxInverse;
try {
xxInverse = MathUtils.inverse(xxMat);
exec.setProgress(0.36);
exec.checkCanceled();
} catch (ArithmeticException ex) {
throw new ArithmeticException("The attributes of the data samples" + " are not mutually independent.");
}
// compute (X'X)^-1 * (X'Y)
final double[][] betas = MathUtils.multiply(xxInverse, xyMat);
exec.setProgress(0.4);
m_betas = new double[independentVariables * degree + 1];
for (int i = 0; i < betas.length; i++) {
m_betas[i] = betas[i][0];
}
m_columnNames = selectedCols;
String[] temp = new String[m_columnNames.length + 1];
System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
temp[temp.length - 1] = m_settings.getTargetColumn();
FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
int ignore = rowContainer.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn());
m_meanValues = new double[independentVariables];
for (DataRow row : rowContainer) {
int k = 0;
for (int i = 0; i < row.getNumCells(); i++) {
if (i != ignore) {
m_meanValues[k++] += ((DoubleValue) row.getCell(i)).getDoubleValue();
}
}
}
for (int i = 0; i < m_meanValues.length; i++) {
m_meanValues[i] /= rowContainer.size();
}
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
// handle the optional PMML input
PMMLPortObject inPMMLPort = (PMMLPortObject) inData[1];
PortObject[] bdt = new PortObject[] { exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6)), createPMMLModel(inPMMLPort, inTable.getDataTableSpec()) };
m_squaredError /= rowCount;
m_viewData = new PolyRegViewData(m_meanValues, m_betas, m_squaredError, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn());
m_rowContainer = rowContainer;
return bdt;
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class ExtendedStatisticsNodeModel method execute.
/**
* {@inheritDoc}
*
* @throws CanceledExecutionException
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException {
double initPercent = m_enableHiLite.getBooleanValue() ? .25 : .2;
ExecutionContext init = exec.createSubExecutionContext(initPercent);
DataTableSpec dataSpec = inData[0].getDataTableSpec();
List<String> includes = nominalColumns(dataSpec);
m_statTable = new Statistics3Table(inData[0], m_computeMedian.getBooleanValue(), numOfNominalValuesOutput(), includes, init);
if (getStatTable().getWarning() != null) {
setWarningMessage(getStatTable().getWarning());
}
BufferedDataTable outTableOccurrences = exec.createBufferedDataTable(getStatTable().createNominalValueTable(includes), exec.createSubProgress(0.5));
BufferedDataTable[] ret = new BufferedDataTable[3];
DataTableSpec newSpec = renamedOccurrencesSpec(outTableOccurrences.getSpec());
ret[2] = exec.createSpecReplacerTable(outTableOccurrences, newSpec);
ExecutionContext table = exec.createSubExecutionContext(initPercent);
ret[0] = getStatTable().createStatisticsInColumnsTable(table);
ExecutionContext histogram = exec.createSubExecutionContext(1.0 / 2);
final HistogramColumn histogramColumn = createHistogramColumn();
HiLiteHandler hlHandler = getEnableHiLite().getBooleanValue() ? getInHiLiteHandler(0) : new HiLiteHandler();
double[] mins = getStatTable().getMin(), maxes = getStatTable().getMax(), means = getStatTable().getMean();
for (int i = 0; i < maxes.length; i++) {
DataCell min = getStatTable().getNonInfMin(i);
if (min.isMissing()) {
mins[i] = Double.NaN;
} else {
mins[i] = ((DoubleValue) min).getDoubleValue();
}
DataCell max = getStatTable().getNonInfMax(i);
if (max.isMissing()) {
maxes[i] = Double.NaN;
} else {
maxes[i] = ((DoubleValue) max).getDoubleValue();
}
}
Pair<BufferedDataTable, Map<Integer, ? extends HistogramModel<?>>> pair = histogramColumn.process(histogram, inData[0], hlHandler, ret[0], mins, maxes, means, numOfNominalValues(), getColumnNames());
// final BufferedDataTable outTable =
// histogramColumn.appendNominal(pair.getFirst(), getStatTable(), hlHandler, exec, numOfNominalValues());
ret[0] = pair.getFirst();
ret[1] = histogramColumn.nominalTable(getStatTable(), hlHandler, exec, numOfNominalValues());
if (m_enableHiLite.getBooleanValue()) {
double rest = 1 - initPercent * 2 - 1.0 / 2;
ExecutionContext projection = exec.createSubExecutionContext(rest / 2);
ColumnRearranger rearranger = new ColumnRearranger(dataSpec);
Set<String> colNames = new HashSet<String>(Arrays.asList(getColumnNames()));
for (DataColumnSpec spec : rearranger.createSpec()) {
if ((!spec.getType().isCompatible(DoubleValue.class) && !spec.getType().isCompatible(NominalValue.class)) || !colNames.contains(spec.getName())) {
rearranger.remove(spec.getName());
}
}
ExecutionContext save = exec.createSubExecutionContext(rest / 2);
m_subTable = new DefaultDataArray(projection.createColumnRearrangeTable(inData[0], rearranger, projection), 1, inData[0].getRowCount(), save);
m_histograms = histogramColumn.histograms(inData[0], getInHiLiteHandler(0), mins, maxes, means, getColumnNames());
Set<String> nominalColumns = new LinkedHashSet<String>();
for (int i = 0; i < inData[0].getSpec().getNumColumns(); ++i) {
Map<DataCell, Integer> nominalValues = getStatTable().getNominalValues(i);
if (nominalValues != null) {
nominalColumns.add(inData[0].getSpec().getColumnSpec(i).getName());
}
}
final Pair<Map<Integer, Map<Integer, Set<RowKey>>>, Map<Integer, Map<DataValue, Set<RowKey>>>> bucketsAndNominals = HistogramColumn.construct(m_histograms, m_subTable, nominalColumns);
m_buckets = bucketsAndNominals.getFirst();
m_nominalKeys = bucketsAndNominals.getSecond();
} else {
m_histograms = pair.getSecond();
}
return ret;
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class Rule2DNodeModel method loadInternals.
/**
* Load internals.
*
* @param internDir The intern node directory.
* @param exec Used to report progress or cancel saving.
* @throws IOException Always, since this method has not been implemented
* yet.
* @throws CanceledExecutionException -if the user abnorts the operation.
* @see org.knime.core.node.NodeModel
* #loadInternals(java.io.File,ExecutionMonitor)
*/
@Override
protected void loadInternals(final File internDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
File rules = new File(internDir, RULE_FILE_NAME);
ContainerTable ruleTable = DataContainer.readFromZip(rules);
int rowCount = ruleTable.getRowCount();
m_fuzzyRules = new DefaultDataArray(ruleTable, 1, rowCount, exec);
File data = new File(internDir, DATA_FILE_NAME);
ContainerTable table = DataContainer.readFromZip(data);
rowCount = table.getRowCount();
m_data = new DefaultDataArray(table, 1, rowCount, exec);
}
Aggregations