use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class MDSProjectionNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final DataTableSpec inSpecData = inData[IN_DATA_INDEX].getSpec();
final ColumnRearranger colFilter = new ColumnRearranger(inSpecData);
if (m_includeList != null) {
colFilter.keepOnly(m_includeList.toArray(new String[m_includeList.size()]));
}
BufferedDataTable rowCutDataTable = exec.createColumnRearrangeTable(inData[IN_DATA_INDEX], colFilter, exec.createSilentSubProgress(0.0));
int rowsToUse = m_rowsModel.getIntValue();
if (m_useRowsModel.getBooleanValue()) {
rowsToUse = inData[IN_DATA_INDEX].getRowCount();
}
// Warn if number of rows is greater than chosen number of rows
if (inData[IN_DATA_INDEX].getRowCount() > rowsToUse) {
setWarningMessage("Maximal number of rows to report is less than number of rows in input data table !");
}
// use only specified rows
DataTable dataContainer = new DefaultDataArray(rowCutDataTable, 1, rowsToUse);
// create BufferedDataTable
rowCutDataTable = exec.createBufferedDataTable(dataContainer, exec);
// get the indices of the fixed mds columns
List<String> fixedCols = m_fixedMdsColModel.getIncludeList();
int[] fixedMdsColsIndicies = new int[fixedCols.size()];
DataTableSpec spec = inData[FIXED_DATA_INDEX].getSpec();
for (int i = 0; i < fixedCols.size(); i++) {
fixedMdsColsIndicies[i] = spec.findColumnIndex(fixedCols.get(i));
}
// create MDS manager, init and train stuff
m_manager = new MDSProjectionManager(m_outputDimModel.getIntValue(), m_distModel.getStringValue(), m_fuzzy, rowCutDataTable, inData[FIXED_DATA_INDEX], fixedMdsColsIndicies, exec);
m_manager.setProjectOnly(m_projectOnly.getBooleanValue());
m_manager.init(m_seedModel.getIntValue());
m_manager.train(m_epochsModel.getIntValue(), m_learnrateModel.getDoubleValue());
// create BufferedDataTable out of mapped data.
ColumnRearranger rearranger = createColumnRearranger(inSpecData, new MDSCellFactory(m_manager.getDataPoints(), m_manager.getDimension()));
return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[IN_DATA_INDEX], rearranger, exec.createSubProgress(0.1)) };
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class HierarchicalClusterNodeModel method loadInternals.
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
// distances
File distFile = new File(nodeInternDir, CFG_DIST_DATA);
ContainerTable table1 = DataContainer.readFromZip(distFile);
m_fusionTable = new DefaultDataArray(table1, 1, table1.getRowCount());
// data rows
File dataFile = new File(nodeInternDir, CFG_H_CLUST_DATA);
ContainerTable table2 = DataContainer.readFromZip(dataFile);
m_dataArray = new DefaultDataArray(table2, 1, table2.getRowCount());
File f = new File(nodeInternDir, CFG_HCLUST);
FileInputStream fis = new FileInputStream(f);
NodeSettingsRO settings = NodeSettings.loadFromXML(fis);
// if we had some data...
if (m_dataArray.size() > 0) {
// we also have some clustering nodes
try {
m_rootNode = ClusterNode.loadFromXML(settings, m_dataArray);
} catch (InvalidSettingsException e) {
throw new IOException(e.getMessage());
}
}
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class SotaNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws CanceledExecutionException, Exception {
if (!(inData[SotaNodeModel.INPORT] instanceof BufferedDataTable)) {
throw new IllegalArgumentException("Given indata port object is " + " no BufferedDataTable!");
}
BufferedDataTable bdt = (BufferedDataTable) inData[SotaNodeModel.INPORT];
final DataArray origRowContainer = new DefaultDataArray(bdt, 1, Integer.MAX_VALUE);
DataTable dataTableToUse = bdt;
int indexOfClassCol = -1;
// get index of column containing class information
indexOfClassCol = dataTableToUse.getDataTableSpec().findColumnIndex(m_classCol.getStringValue());
m_sota.initializeTree(dataTableToUse, origRowContainer, exec, indexOfClassCol);
m_sota.doTraining();
if (m_withOutPort) {
return new PortObject[] { new SotaPortObject(m_sota, dataTableToUse.getDataTableSpec(), indexOfClassCol) };
}
return new PortObject[] {};
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class SotaManager method initializeTree.
/**
* Initializes the tree by creating the root node and two children cells of
* the root node. The nodes data are the mean values of the input data
* rows.
*
* @param inData the table with the input data
* @param originalData the original data
* @param exec the execution monitor to set
* @param indexOfClassColumn The index of the column containing the class
* information. If value is -1 class values are ignored.
* @throws CanceledExecutionException if user canceled the process
*/
public void initializeTree(final DataTable inData, final DataArray originalData, final ExecutionMonitor exec, final int indexOfClassColumn) throws CanceledExecutionException {
this.m_indexOfClassColumn = indexOfClassColumn;
this.m_origData = originalData;
this.m_exec = exec;
this.m_inDataContainer = new DefaultDataArray(inData, 1, Integer.MAX_VALUE);
m_exec.checkCanceled();
m_state += 0.01;
m_exec.setProgress(m_state, "Preparing data");
//
// / Check for Fuzzy DataCells
//
this.m_isFuzzy = false;
for (int i = 0; i < m_inDataContainer.getDataTableSpec().getNumColumns(); i++) {
DataType type = m_inDataContainer.getDataTableSpec().getColumnSpec(i).getType();
if (SotaUtil.isFuzzyIntervalType(type)) {
this.m_isFuzzy = true;
}
}
if (m_useHierarchicalFuzzyData) {
this.m_isFuzzy = true;
this.m_inDataContainer = new FuzzyHierarchyFilterRowContainer(m_inDataContainer, m_currentHierarchyLevel);
this.m_maxHierarchicalLevel = ((FuzzyHierarchyFilterRowContainer) m_inDataContainer).getMaxLevel();
}
//
// / Create distance metric
//
double offset = 1;
m_distanceManager = DistanceManagerFactory.createDistanceManager(m_distance, m_isFuzzy, offset);
//
if (this.m_isFuzzy) {
m_helper = new SotaFuzzyHelper(m_inDataContainer, m_exec);
} else {
m_helper = new SotaNumberHelper(m_inDataContainer, m_exec);
}
m_exec.checkCanceled();
// Count all number cells in rows of row container
m_dimension = m_helper.initializeDimension();
// initialize root and children node/cells
m_root = m_helper.initializeTree();
m_root.setLevel(1);
m_exec.checkCanceled();
// assign all Data to the root cell which have no missing values
for (int i = 0; i < m_inDataContainer.size(); i++) {
if (m_root.getDataIds().indexOf(i) == -1) {
DataRow row = m_inDataContainer.getRow(i);
if (!SotaUtil.hasMissingValues(row)) {
m_root.getDataIds().add(i);
}
}
m_exec.checkCanceled();
m_state += 0.1 / m_inDataContainer.size();
m_exec.setProgress(m_state, "Assigning data");
}
// assign the data to the children of the root cell
assignDataToChildren(m_root);
}
use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.
the class LinRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
/*
* What comes next is the matrix calculation, solving A \times w = b
* where A is the matrix having the training data (as many rows as there
* are rows in inData[0], w is the vector of weights to learn (number of
* variables) and b is the target output
*/
// reset was called, must be cleared
final BufferedDataTable data = (BufferedDataTable) inData[0];
final DataTableSpec spec = data.getDataTableSpec();
final String[] includes = computeIncludes(spec);
final int nrUnknown = includes.length + 1;
double[] means = new double[includes.length];
// indices of the columns in m_includes
final int[] colIndizes = new int[includes.length];
for (int i = 0; i < includes.length; i++) {
colIndizes[i] = spec.findColumnIndex(includes[i]);
}
// index of m_target
final int target = spec.findColumnIndex(m_target);
// this is the matrix (A^T x A) where A is the training data including
// one column fixed to one.
// (we do it here manually in order to avoid to get all the data in
// double[][])
double[][] ata = new double[nrUnknown][nrUnknown];
double[] buffer = new double[nrUnknown];
// we memorize for each row if it contains missing values.
BitSet missingSet = new BitSet();
m_nrRows = data.getRowCount();
int myProgress = 0;
// we need 2 or 3 scans on the data (first run was done already)
final double totalProgress = (2 + (m_isCalcError ? 1 : 0)) * m_nrRows;
int rowCount = 0;
boolean hasPrintedWarning = false;
for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
DataRow row = it.next();
myProgress++;
exec.setProgress(myProgress / totalProgress, "Calculating matrix " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
exec.checkCanceled();
DataCell targetValue = row.getCell(target);
// read data from row into buffer, skip missing value rows
boolean containsMissing = targetValue.isMissing() || readIntoBuffer(row, buffer, colIndizes);
missingSet.set(rowCount, containsMissing);
if (containsMissing) {
String errorMessage = "Row \"" + row.getKey().getString() + "\" contains missing values, skipping it.";
if (!hasPrintedWarning) {
LOGGER.warn(errorMessage + " Suppress further warnings.");
hasPrintedWarning = true;
} else {
LOGGER.debug(errorMessage);
}
m_nrRowsSkipped++;
// with next row
continue;
}
updateMean(buffer, means);
// the matrix is symmetric
for (int i = 0; i < nrUnknown; i++) {
for (int j = 0; j < nrUnknown; j++) {
ata[i][j] += buffer[i] * buffer[j];
}
}
}
assert (m_nrRows == rowCount);
normalizeMean(means);
// no unique solution when there are less rows than unknown variables
if (rowCount <= nrUnknown) {
throw new Exception("Too few rows to perform regression (" + rowCount + " rows, but degree of freedom of " + nrUnknown + ")");
}
exec.setMessage("Calculating pseudo inverse...");
double[][] ataInverse = MathUtils.inverse(ata);
checkForNaN(ataInverse);
// multiply with A^T and b, i.e. (A^T x A)^-1 x A^T x b
double[] multipliers = new double[nrUnknown];
rowCount = 0;
for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
DataRow row = it.next();
exec.setMessage("Determining output " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
myProgress++;
exec.setProgress(myProgress / totalProgress);
exec.checkCanceled();
// does row containing missing values?
if (missingSet.get(rowCount)) {
// error has printed above, silently ignore here.
continue;
}
boolean containsMissing = readIntoBuffer(row, buffer, colIndizes);
assert !containsMissing;
DataCell targetValue = row.getCell(target);
double b = ((DoubleValue) targetValue).getDoubleValue();
for (int i = 0; i < nrUnknown; i++) {
double buf = 0.0;
for (int j = 0; j < nrUnknown; j++) {
buf += ataInverse[i][j] * buffer[j];
}
multipliers[i] += buf * b;
}
}
if (m_isCalcError) {
assert m_error == 0.0;
rowCount = 0;
for (RowIterator it = data.iterator(); it.hasNext(); rowCount++) {
DataRow row = it.next();
exec.setMessage("Calculating error " + (rowCount + 1) + " (\"" + row.getKey().getString() + "\")");
myProgress++;
exec.setProgress(myProgress / totalProgress);
exec.checkCanceled();
// does row containing missing values?
if (missingSet.get(rowCount)) {
// error has printed above, silently ignore here.
continue;
}
boolean hasMissing = readIntoBuffer(row, buffer, colIndizes);
assert !hasMissing;
DataCell targetValue = row.getCell(target);
double b = ((DoubleValue) targetValue).getDoubleValue();
double out = 0.0;
for (int i = 0; i < nrUnknown; i++) {
out += multipliers[i] * buffer[i];
}
m_error += (b - out) * (b - out);
}
}
// handle the optional PMML input
PMMLPortObject inPMMLPort = (PMMLPortObject) inData[1];
DataTableSpec outSpec = getLearningSpec(spec);
double offset = multipliers[0];
multipliers = Arrays.copyOfRange(multipliers, 1, multipliers.length);
m_params = new LinearRegressionContent(outSpec, offset, multipliers, means);
// cache the entire table as otherwise the color information
// may be lost (filtering out the "colored" column)
m_rowContainer = new DefaultDataArray(data, m_firstRowPaint, m_rowCountPaint);
m_actualUsedColumns = includes;
return new PortObject[] { m_params.createPortObject(inPMMLPort, spec, outSpec) };
}
Aggregations