use of org.knime.core.data.DataTable in project knime-core by knime.
the class SmoteNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws CanceledExecutionException, Exception {
BufferedDataTable in = inData[0];
Random rand;
if (m_seed != null) {
rand = new Random(m_seed);
} else {
rand = new Random();
}
Smoter smoter = new Smoter(in, m_class, exec, rand);
if (m_method.equals(METHOD_ALL)) {
// count number of rows to add
int nrRowsToAdd = 0;
for (Iterator<DataCell> it = smoter.getClassValues(); it.hasNext(); ) {
int count = smoter.getCount(it.next());
nrRowsToAdd += (int) (count * m_rate);
}
for (Iterator<DataCell> it = smoter.getClassValues(); it.hasNext(); ) {
DataCell cur = it.next();
int count = smoter.getCount(cur);
int newCount = (int) (count * m_rate);
exec.setMessage("Smoting '" + cur.toString() + "'");
ExecutionMonitor subExec = exec.createSubProgress(newCount / (double) nrRowsToAdd);
smoter.smote(cur, newCount, m_kNN, subExec);
}
} else if (m_method.equals(METHOD_MAJORITY)) {
DataCell majority = smoter.getMajorityClass();
int majorityCount = smoter.getCount(majority);
Iterator<DataCell> it = smoter.getClassValues();
int nrRowsToAdd = 0;
while (it.hasNext()) {
DataCell cur = it.next();
nrRowsToAdd += (majorityCount - smoter.getCount(cur));
}
it = smoter.getClassValues();
while (it.hasNext()) {
DataCell cur = it.next();
int count = smoter.getCount(cur);
int newCount = majorityCount - count;
exec.setMessage("Smoting '" + cur.toString() + "'");
ExecutionMonitor subExec = exec.createSubProgress(newCount / (double) nrRowsToAdd);
smoter.smote(cur, newCount, m_kNN, subExec);
}
}
smoter.close();
DataTable out = smoter.getSmotedTable();
return new BufferedDataTable[] { exec.createBufferedDataTable(out, exec) };
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class SotaNodeModel method loadInternals.
/**
* {@inheritDoc}
*/
@Override
protected void loadInternals(final File internDir, final ExecutionMonitor exec) throws IOException {
File file = new File(internDir, TREE_FILE);
FileInputStream fis = new FileInputStream(file);
ModelContentRO modelContent = ModelContent.loadFromXML(fis);
// Load settings
int inDataSize = 0;
int origDataSize = 0;
try {
m_sota.setUseHierarchicalFuzzyData(modelContent.getBoolean(SotaPortObject.CFG_KEY_USE_FUZZY_HIERARCHY));
m_sota.setMaxHierarchicalLevel(modelContent.getInt(SotaPortObject.CFG_KEY_MAX_FUZZY_LEVEL));
inDataSize = modelContent.getInt(SotaPortObject.CFG_KEY_INDATA_SIZE);
origDataSize = modelContent.getInt(SotaPortObject.CFG_KEY_ORIGDATA_SIZE);
} catch (InvalidSettingsException e1) {
IOException ioe = new IOException("Could not load settings," + "due to invalid settings in model content !");
ioe.initCause(e1);
fis.close();
throw ioe;
}
// Load in data
DataTable table = DataContainer.readFromZip(new File(internDir, IN_DATA_FILE));
final DataArray inData = new DefaultDataArray(table, 1, inDataSize);
m_sota.setInData(inData);
// Load orig data
table = DataContainer.readFromZip(new File(internDir, ORIG_DATA_FILE));
final DataArray origData = new DefaultDataArray(table, 1, origDataSize);
m_sota.setOriginalData(origData);
// Load tree
SotaTreeCell root = new SotaTreeCell(0, false);
try {
root.loadFrom(modelContent, 0, null, false);
} catch (InvalidSettingsException e) {
IOException ioe = new IOException("Could not load tree cells," + "due to invalid settings in model content !");
ioe.initCause(e);
fis.close();
throw ioe;
}
m_sota.setRoot(root);
fis.close();
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class PCAReverseNodeModel method execute.
/**
* Performs the PCA.
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final PCAModelPortObject model = (PCAModelPortObject) inData[MODEL_INPORT];
final Matrix eigenvectors = EigenValue.getSortedEigenVectors(model.getEigenVectors(), model.getEigenvalues(), m_inputColumnIndices.length);
if (m_failOnMissingValues.getBooleanValue()) {
for (final DataRow row : (DataTable) inData[DATA_INPORT]) {
for (int i = 0; i < m_inputColumnIndices.length; i++) {
if (row.getCell(m_inputColumnIndices[i]).isMissing()) {
throw new IllegalArgumentException("data table contains missing values");
}
}
}
}
final String[] origColumnNames = ((PCAModelPortObjectSpec) ((PCAModelPortObject) inData[MODEL_INPORT]).getSpec()).getColumnNames();
final DataColumnSpec[] specs = createAddTableSpec((DataTableSpec) inData[DATA_INPORT].getSpec(), origColumnNames);
final CellFactory fac = new CellFactory() {
@Override
public DataCell[] getCells(final DataRow row) {
return convertInputRow(eigenvectors, row, model.getCenter(), m_inputColumnIndices, origColumnNames.length);
}
@Override
public DataColumnSpec[] getColumnSpecs() {
return specs;
}
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor texec) {
texec.setProgress((double) curRowNr / rowCount);
}
};
final ColumnRearranger cr = new ColumnRearranger((DataTableSpec) inData[DATA_INPORT].getSpec());
cr.append(fac);
if (m_removePCACols.getBooleanValue()) {
cr.remove(m_inputColumnIndices);
}
final BufferedDataTable result = exec.createColumnRearrangeTable((BufferedDataTable) inData[DATA_INPORT], cr, exec);
final PortObject[] out = { result };
return out;
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class PolyRegLineScatterPlotter method modelChanged.
/**
* This method must be called if the model has changed. It updates the
* plotter to show the new model's values.
*/
public void modelChanged() {
m_viewData = m_model.getViewData();
DataTable data = m_viewData.getRowContainer();
if (data != null) {
final DataTableSpec origSpec = data.getDataTableSpec();
final MyProperties props = (MyProperties) getProperties();
DataColumnSpec[] colSpecs = new DataColumnSpec[origSpec.getNumColumns() - 1];
int i = 0;
for (DataColumnSpec cs : origSpec) {
if (!m_viewData.targetColumn.equals(cs.getName())) {
colSpecs[i++] = cs;
} else {
m_yColumnSpec = cs;
getYAxis().setCoordinate(Coordinate.createCoordinate(cs));
}
}
m_xColumnSpec = colSpecs[0];
getXAxis().setCoordinate(Coordinate.createCoordinate(colSpecs[0]));
m_filteredSpec = new DataTableSpec(colSpecs);
try {
props.m_xColumn.update(m_filteredSpec, colSpecs[0].getName());
} catch (NotConfigurableException ex) {
// cannot happen
assert false : ex.getMessage();
}
reset();
updatePaintModel();
}
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class HierarchicalClusterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
// determine the indices of the selected columns
List<String> inlcludedCols = m_selectedColumns.getIncludeList();
int[] selectedColIndices = new int[inlcludedCols.size()];
for (int count = 0; count < selectedColIndices.length; count++) {
selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
}
BufferedDataTable inputData = data[0];
if (inputData.size() > 65500) {
throw new RuntimeException("At most 65,500 patterns can be clustered");
}
DataTable outputData = null;
if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
} else {
m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
}
// generate initial clustering
// which means that every data point is one cluster
List<ClusterNode> clusters = initClusters(inputData, exec);
// store the distance per each fusion step
DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
int iterationStep = 0;
final HalfFloatMatrix cache;
if (m_cacheDistances.getBooleanValue()) {
cache = new HalfFloatMatrix((int) inputData.size(), false);
cache.fill(Float.NaN);
} else {
cache = null;
}
double max = inputData.size();
// the number of clusters at the beginning is equal to the number
// of data rows (each row is a cluster)
int numberDataRows = clusters.size();
while (clusters.size() > 1) {
// checks if number clusters to generate output table is reached
if (m_numClustersForOutput.getIntValue() == clusters.size()) {
outputData = createResultTable(inputData, clusters, exec);
}
exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
iterationStep++;
exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
// calculate distance between all clusters
float currentSmallestDist = Float.MAX_VALUE;
ClusterNode currentClosestCluster1 = null;
ClusterNode currentClosestCluster2 = null;
// subprogress for loop
double availableProgress = (1.0 / numberDataRows);
ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
for (int i = 0; i < clusters.size(); i++) {
exec.checkCanceled();
ClusterNode node1 = clusters.get(i);
for (int j = i + 1; j < clusters.size(); j++) {
final float dist;
ClusterNode node2 = clusters.get(j);
// and average linkage supported.
if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
} else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
} else {
dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
}
if (dist < currentSmallestDist) {
currentClosestCluster1 = node1;
currentClosestCluster2 = node2;
currentSmallestDist = dist;
}
}
}
subexec.setProgress(1.0);
// make one cluster of the two closest
ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
clusters.remove(currentClosestCluster1);
clusters.remove(currentClosestCluster2);
clusters.add(newNode);
// store the distance per each fusion step
fusionCont.addRowToTable(new DefaultRow(// row key
Integer.toString(clusters.size()), // x-axis scatter plotter
new IntCell(clusters.size()), // y-axis scatter plotter
new DoubleCell(newNode.getDist())));
// // print number clusters and their data points
// LOGGER.debug("Iteration " + iterationStep + ":");
// LOGGER.debug(" Number Clusters: " + clusters.size());
// printClustersDataRows(clusters);
}
if (clusters.size() > 0) {
m_rootNode = clusters.get(0);
}
fusionCont.close();
// if there was no input data create an empty output data
if (outputData == null) {
outputData = createResultTable(inputData, clusters, exec);
}
m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Aggregations