use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class ClassAttributeModel method createDataRows.
/**
* {@inheritDoc}
*/
@Override
void createDataRows(final ExecutionMonitor exec, final BufferedDataContainer dc, final boolean ignoreMissing, final AtomicInteger rowId) throws CanceledExecutionException {
final List<String> sortedClassVal = AttributeModel.sortCollection(m_recsCounterByClassVal.keySet());
if (sortedClassVal == null) {
return;
}
final StringCell attributeNameCell = new StringCell(getAttributeName());
for (final String classVal : sortedClassVal) {
final StringCell classCell = new StringCell(classVal);
final List<DataCell> cells = new LinkedList<>();
cells.add(attributeNameCell);
cells.add(DataType.getMissingCell());
cells.add(classCell);
cells.add(new IntCell(getNoOfRecs4ClassValue(classVal)));
if (!ignoreMissing) {
cells.add(new IntCell(getNoOfMissingVals()));
}
cells.add(DataType.getMissingCell());
cells.add(DataType.getMissingCell());
dc.addRowToTable(new DefaultRow(RowKey.createRowKey(rowId.getAndIncrement()), cells.toArray(new DataCell[0])));
}
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class NominalAttributeModel method createDataRows.
/**
* {@inheritDoc}
*/
@Override
void createDataRows(final ExecutionMonitor exec, final BufferedDataContainer dc, final boolean ignoreMissing, final AtomicInteger rowId) throws CanceledExecutionException {
final List<String> sortedClassVal = AttributeModel.sortCollection(m_classValues.keySet());
if (sortedClassVal == null) {
return;
}
final List<String> sortedAttrValues = AttributeModel.sortCollection(m_attributeVals);
final StringCell attributeNameCell = new StringCell(getAttributeName());
for (final String attrVal : sortedAttrValues) {
final StringCell attributeValueCell = new StringCell(attrVal);
for (final String classVal : sortedClassVal) {
final StringCell classCell = new StringCell(classVal);
final NominalClassValue classValue = m_classValues.get(classVal);
final List<DataCell> cells = new LinkedList<>();
cells.add(attributeNameCell);
cells.add(attributeValueCell);
cells.add(classCell);
cells.add(new IntCell(classValue.getNoOfRows4AttributeValue(attrVal)));
if (!ignoreMissing) {
cells.add(new IntCell(classValue.getNoOfMissingValueRecs()));
}
cells.add(DataType.getMissingCell());
cells.add(DataType.getMissingCell());
dc.addRowToTable(new DefaultRow(RowKey.createRowKey(rowId.getAndIncrement()), cells.toArray(new DataCell[0])));
}
}
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class EntropyCalculator method createScoreTable.
private static DataTable createScoreTable(final Map<RowKey, RowKey> referenceMap, final Map<RowKey, Set<RowKey>> clusteringMap) {
ArrayList<DefaultRow> sortedRows = new ArrayList<DefaultRow>();
// number of different clusters in reference clustering, used for
// normalization
int clusterCardinalityInReference = (new HashSet<RowKey>(referenceMap.values())).size();
double normalization = Math.log(clusterCardinalityInReference) / Math.log(2.0);
int totalSize = 0;
for (Map.Entry<RowKey, Set<RowKey>> e : clusteringMap.entrySet()) {
int size = e.getValue().size();
DataCell sizeCell = new IntCell(size);
totalSize += size;
double entropy = entropy(referenceMap, e.getValue());
DataCell entropyCell = new DoubleCell(entropy);
DataCell normEntropy = new DoubleCell(entropy / normalization);
DataCell quality = DataType.getMissingCell();
RowKey clusterID = e.getKey();
DefaultRow row = new DefaultRow(clusterID, sizeCell, entropyCell, normEntropy, quality);
sortedRows.add(row);
}
Collections.sort(sortedRows, new Comparator<DefaultRow>() {
@Override
public int compare(final DefaultRow o1, final DefaultRow o2) {
double e1 = ((DoubleValue) o1.getCell(2)).getDoubleValue();
double e2 = ((DoubleValue) o2.getCell(2)).getDoubleValue();
return e1 < e2 ? -1 : e1 > e2 ? 1 : 0;
}
});
DataRow[] rows = sortedRows.toArray(new DataRow[0]);
DataTableSpec tableSpec = getScoreTableSpec();
DataContainer container = new DataContainer(tableSpec);
for (DataRow r : rows) {
container.addRowToTable(r);
}
// last row contains overall quality values
double entropy = getEntropy(referenceMap, clusteringMap);
double quality = getQuality(referenceMap, clusteringMap);
DataCell entropyCell = new DoubleCell(entropy);
DataCell normEntropy = new DoubleCell(entropy / normalization);
DataCell qualityCell = new DoubleCell(quality);
DataCell size = new IntCell(totalSize);
RowKey clusterID = new RowKey("Overall");
int uniquifier = 1;
while (clusteringMap.containsKey(clusterID)) {
clusterID = new RowKey("Overall (#" + (uniquifier++) + ")");
}
DefaultRow row = new DefaultRow(clusterID, size, entropyCell, normEntropy, qualityCell);
container.addRowToTable(row);
container.close();
return container.getTable();
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class PolyRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getDataTableSpec();
final int colCount = inSpec.getNumColumns();
String[] selectedCols = computeSelectedColumns(inSpec);
Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
m_colSelected = new boolean[colCount];
for (int i = 0; i < colCount; i++) {
m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
}
final int rowCount = inTable.getRowCount();
String[] temp = new String[m_columnNames.length + 1];
System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
temp[temp.length - 1] = m_settings.getTargetColumn();
FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
try {
PolyRegContent polyRegContent = learner.perform(inTable, exec);
m_betas = fillBeta(polyRegContent);
m_meanValues = polyRegContent.getMeans();
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
m_squaredError /= rowCount;
if (polyRegContent.getWarningMessage() != null) {
setWarningMessage(polyRegContent.getWarningMessage());
}
double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
return bdt;
} catch (ModelSpecificationException e) {
final String origWarning = getWarningMessage();
final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
setWarningMessage(warning);
final ExecutionContext subExec = exec.createSubExecutionContext(.1);
final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
int rowIdx = 1;
for (final String column : m_columnNames) {
for (int d = 1; d <= m_settings.getDegree(); ++d) {
empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
}
}
empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
Arrays.fill(nans, Double.NaN);
m_betas = new double[nans.length];
// Mean only for the linear tags
m_meanValues = new double[nans.length / m_settings.getDegree()];
m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
empty.close();
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
return bdt;
}
}
use of org.knime.core.data.def.IntCell in project knime-core by knime.
the class HierarchicalClusterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
// determine the indices of the selected columns
List<String> inlcludedCols = m_selectedColumns.getIncludeList();
int[] selectedColIndices = new int[inlcludedCols.size()];
for (int count = 0; count < selectedColIndices.length; count++) {
selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
}
BufferedDataTable inputData = data[0];
if (inputData.size() > 65500) {
throw new RuntimeException("At most 65,500 patterns can be clustered");
}
DataTable outputData = null;
if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
} else {
m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
}
// generate initial clustering
// which means that every data point is one cluster
List<ClusterNode> clusters = initClusters(inputData, exec);
// store the distance per each fusion step
DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
int iterationStep = 0;
final HalfFloatMatrix cache;
if (m_cacheDistances.getBooleanValue()) {
cache = new HalfFloatMatrix((int) inputData.size(), false);
cache.fill(Float.NaN);
} else {
cache = null;
}
double max = inputData.size();
// the number of clusters at the beginning is equal to the number
// of data rows (each row is a cluster)
int numberDataRows = clusters.size();
while (clusters.size() > 1) {
// checks if number clusters to generate output table is reached
if (m_numClustersForOutput.getIntValue() == clusters.size()) {
outputData = createResultTable(inputData, clusters, exec);
}
exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
iterationStep++;
exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
// calculate distance between all clusters
float currentSmallestDist = Float.MAX_VALUE;
ClusterNode currentClosestCluster1 = null;
ClusterNode currentClosestCluster2 = null;
// subprogress for loop
double availableProgress = (1.0 / numberDataRows);
ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
for (int i = 0; i < clusters.size(); i++) {
exec.checkCanceled();
ClusterNode node1 = clusters.get(i);
for (int j = i + 1; j < clusters.size(); j++) {
final float dist;
ClusterNode node2 = clusters.get(j);
// and average linkage supported.
if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
} else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
} else {
dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
}
if (dist < currentSmallestDist) {
currentClosestCluster1 = node1;
currentClosestCluster2 = node2;
currentSmallestDist = dist;
}
}
}
subexec.setProgress(1.0);
// make one cluster of the two closest
ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
clusters.remove(currentClosestCluster1);
clusters.remove(currentClosestCluster2);
clusters.add(newNode);
// store the distance per each fusion step
fusionCont.addRowToTable(new DefaultRow(// row key
Integer.toString(clusters.size()), // x-axis scatter plotter
new IntCell(clusters.size()), // y-axis scatter plotter
new DoubleCell(newNode.getDist())));
// // print number clusters and their data points
// LOGGER.debug("Iteration " + iterationStep + ":");
// LOGGER.debug(" Number Clusters: " + clusters.size());
// printClustersDataRows(clusters);
}
if (clusters.size() > 0) {
m_rootNode = clusters.get(0);
}
fusionCont.close();
// if there was no input data create an empty output data
if (outputData == null) {
outputData = createResultTable(inputData, clusters, exec);
}
m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Aggregations