use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class EntropyCalculator method createScoreTable.
private static DataTable createScoreTable(final Map<RowKey, RowKey> referenceMap, final Map<RowKey, Set<RowKey>> clusteringMap) {
ArrayList<DefaultRow> sortedRows = new ArrayList<DefaultRow>();
// number of different clusters in reference clustering, used for
// normalization
int clusterCardinalityInReference = (new HashSet<RowKey>(referenceMap.values())).size();
double normalization = Math.log(clusterCardinalityInReference) / Math.log(2.0);
int totalSize = 0;
for (Map.Entry<RowKey, Set<RowKey>> e : clusteringMap.entrySet()) {
int size = e.getValue().size();
DataCell sizeCell = new IntCell(size);
totalSize += size;
double entropy = entropy(referenceMap, e.getValue());
DataCell entropyCell = new DoubleCell(entropy);
DataCell normEntropy = new DoubleCell(entropy / normalization);
DataCell quality = DataType.getMissingCell();
RowKey clusterID = e.getKey();
DefaultRow row = new DefaultRow(clusterID, sizeCell, entropyCell, normEntropy, quality);
sortedRows.add(row);
}
Collections.sort(sortedRows, new Comparator<DefaultRow>() {
@Override
public int compare(final DefaultRow o1, final DefaultRow o2) {
double e1 = ((DoubleValue) o1.getCell(2)).getDoubleValue();
double e2 = ((DoubleValue) o2.getCell(2)).getDoubleValue();
return e1 < e2 ? -1 : e1 > e2 ? 1 : 0;
}
});
DataRow[] rows = sortedRows.toArray(new DataRow[0]);
DataTableSpec tableSpec = getScoreTableSpec();
DataContainer container = new DataContainer(tableSpec);
for (DataRow r : rows) {
container.addRowToTable(r);
}
// last row contains overall quality values
double entropy = getEntropy(referenceMap, clusteringMap);
double quality = getQuality(referenceMap, clusteringMap);
DataCell entropyCell = new DoubleCell(entropy);
DataCell normEntropy = new DoubleCell(entropy / normalization);
DataCell qualityCell = new DoubleCell(quality);
DataCell size = new IntCell(totalSize);
RowKey clusterID = new RowKey("Overall");
int uniquifier = 1;
while (clusteringMap.containsKey(clusterID)) {
clusterID = new RowKey("Overall (#" + (uniquifier++) + ")");
}
DefaultRow row = new DefaultRow(clusterID, size, entropyCell, normEntropy, qualityCell);
container.addRowToTable(row);
container.close();
return container.getTable();
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class PolyRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable inTable = (BufferedDataTable) inData[0];
DataTableSpec inSpec = inTable.getDataTableSpec();
final int colCount = inSpec.getNumColumns();
String[] selectedCols = computeSelectedColumns(inSpec);
Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
m_colSelected = new boolean[colCount];
for (int i = 0; i < colCount; i++) {
m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
}
final int rowCount = inTable.getRowCount();
String[] temp = new String[m_columnNames.length + 1];
System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
temp[temp.length - 1] = m_settings.getTargetColumn();
FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
try {
PolyRegContent polyRegContent = learner.perform(inTable, exec);
m_betas = fillBeta(polyRegContent);
m_meanValues = polyRegContent.getMeans();
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
m_squaredError /= rowCount;
if (polyRegContent.getWarningMessage() != null) {
setWarningMessage(polyRegContent.getWarningMessage());
}
double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
return bdt;
} catch (ModelSpecificationException e) {
final String origWarning = getWarningMessage();
final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
setWarningMessage(warning);
final ExecutionContext subExec = exec.createSubExecutionContext(.1);
final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
int rowIdx = 1;
for (final String column : m_columnNames) {
for (int d = 1; d <= m_settings.getDegree(); ++d) {
empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
}
}
empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
Arrays.fill(nans, Double.NaN);
m_betas = new double[nans.length];
// Mean only for the linear tags
m_meanValues = new double[nans.length / m_settings.getDegree()];
m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
empty.close();
ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
return bdt;
}
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class PolyRegLearnerNodeModel method getCellFactory.
private CellFactory getCellFactory(final int dependentIndex) {
final int degree = m_settings.getDegree();
return new CellFactory() {
@Override
public DataCell[] getCells(final DataRow row) {
double sum = m_betas[0];
int betaCount = 1;
double y = 0;
for (int col = 0; col < row.getNumCells(); col++) {
if ((col == dependentIndex || m_colSelected[col]) && row.getCell(col).isMissing()) {
switch(m_settings.getMissingValueHandling()) {
case ignore:
return new DataCell[] { DataType.getMissingCell(), DataType.getMissingCell() };
case fail:
throw new IllegalStateException("Should failed earlier!");
default:
throw new UnsupportedOperationException("Not supported missing handling strategy: " + m_settings.getMissingValueHandling());
}
}
if ((col != dependentIndex) && m_colSelected[col]) {
final double value = ((DoubleValue) row.getCell(col)).getDoubleValue();
double poly = 1;
for (int d = 1; d <= degree; d++) {
poly *= value;
sum += m_betas[betaCount++] * poly;
}
} else if (col == dependentIndex) {
y = ((DoubleValue) row.getCell(col)).getDoubleValue();
}
}
double err = Math.abs(sum - y);
m_squaredError += err * err;
return new DataCell[] { new DoubleCell(sum), new DoubleCell(err) };
}
@Override
public DataColumnSpec[] getColumnSpecs() {
DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
DataColumnSpec col1 = crea.createSpec();
crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
DataColumnSpec col2 = crea.createSpec();
return new DataColumnSpec[] { col1, col2 };
}
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor execMon) {
// do nothing
}
};
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class HierarchicalClusterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
// determine the indices of the selected columns
List<String> inlcludedCols = m_selectedColumns.getIncludeList();
int[] selectedColIndices = new int[inlcludedCols.size()];
for (int count = 0; count < selectedColIndices.length; count++) {
selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
}
BufferedDataTable inputData = data[0];
if (inputData.size() > 65500) {
throw new RuntimeException("At most 65,500 patterns can be clustered");
}
DataTable outputData = null;
if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
} else {
m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
}
// generate initial clustering
// which means that every data point is one cluster
List<ClusterNode> clusters = initClusters(inputData, exec);
// store the distance per each fusion step
DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
int iterationStep = 0;
final HalfFloatMatrix cache;
if (m_cacheDistances.getBooleanValue()) {
cache = new HalfFloatMatrix((int) inputData.size(), false);
cache.fill(Float.NaN);
} else {
cache = null;
}
double max = inputData.size();
// the number of clusters at the beginning is equal to the number
// of data rows (each row is a cluster)
int numberDataRows = clusters.size();
while (clusters.size() > 1) {
// checks if number clusters to generate output table is reached
if (m_numClustersForOutput.getIntValue() == clusters.size()) {
outputData = createResultTable(inputData, clusters, exec);
}
exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
iterationStep++;
exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
// calculate distance between all clusters
float currentSmallestDist = Float.MAX_VALUE;
ClusterNode currentClosestCluster1 = null;
ClusterNode currentClosestCluster2 = null;
// subprogress for loop
double availableProgress = (1.0 / numberDataRows);
ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
for (int i = 0; i < clusters.size(); i++) {
exec.checkCanceled();
ClusterNode node1 = clusters.get(i);
for (int j = i + 1; j < clusters.size(); j++) {
final float dist;
ClusterNode node2 = clusters.get(j);
// and average linkage supported.
if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
} else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
} else {
dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
}
if (dist < currentSmallestDist) {
currentClosestCluster1 = node1;
currentClosestCluster2 = node2;
currentSmallestDist = dist;
}
}
}
subexec.setProgress(1.0);
// make one cluster of the two closest
ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
clusters.remove(currentClosestCluster1);
clusters.remove(currentClosestCluster2);
clusters.add(newNode);
// store the distance per each fusion step
fusionCont.addRowToTable(new DefaultRow(// row key
Integer.toString(clusters.size()), // x-axis scatter plotter
new IntCell(clusters.size()), // y-axis scatter plotter
new DoubleCell(newNode.getDist())));
// // print number clusters and their data points
// LOGGER.debug("Iteration " + iterationStep + ":");
// LOGGER.debug(" Number Clusters: " + clusters.size());
// printClustersDataRows(clusters);
}
if (clusters.size() > 0) {
m_rootNode = clusters.get(0);
}
fusionCont.close();
// if there was no input data create an empty output data
if (outputData == null) {
outputData = createResultTable(inputData, clusters, exec);
}
m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
use of org.knime.core.data.def.DoubleCell in project knime-core by knime.
the class DecTreePredictorNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
public PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws CanceledExecutionException, Exception {
exec.setMessage("Decision Tree Predictor: Loading predictor...");
PMMLPortObject port = (PMMLPortObject) inPorts[INMODELPORT];
List<Node> models = port.getPMMLValue().getModels(PMMLModelType.TreeModel);
if (models.isEmpty()) {
String msg = "Decision Tree evaluation failed: " + "No tree model found.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
PMMLDecisionTreeTranslator trans = new PMMLDecisionTreeTranslator();
port.initializeModelTranslator(trans);
DecisionTree decTree = trans.getDecisionTree();
decTree.resetColorInformation();
BufferedDataTable inData = (BufferedDataTable) inPorts[INDATAPORT];
// get column with color information
String colorColumn = null;
for (DataColumnSpec s : inData.getDataTableSpec()) {
if (s.getColorHandler() != null) {
colorColumn = s.getName();
break;
}
}
decTree.setColorColumn(colorColumn);
exec.setMessage("Decision Tree Predictor: start execution.");
PortObjectSpec[] inSpecs = new PortObjectSpec[] { inPorts[0].getSpec(), inPorts[1].getSpec() };
DataTableSpec outSpec = createOutTableSpec(inSpecs);
BufferedDataContainer outData = exec.createDataContainer(outSpec);
long coveredPattern = 0;
long nrPattern = 0;
long rowCount = 0;
final long numberRows = inData.size();
exec.setMessage("Classifying...");
List<String> predictionValues = getPredictionStrings((PMMLPortObjectSpec) inPorts[INMODELPORT].getSpec());
for (DataRow thisRow : inData) {
DataCell cl = null;
LinkedHashMap<String, Double> classDistrib = null;
try {
Pair<DataCell, LinkedHashMap<DataCell, Double>> pair = decTree.getWinnerAndClasscounts(thisRow, inData.getDataTableSpec());
cl = pair.getFirst();
LinkedHashMap<DataCell, Double> classCounts = pair.getSecond();
classDistrib = getDistribution(classCounts);
if (coveredPattern < m_maxNumCoveredPattern.getIntValue()) {
// remember this one for HiLite support
decTree.addCoveredPattern(thisRow, inData.getDataTableSpec());
coveredPattern++;
} else {
// too many patterns for HiLite - at least remember color
decTree.addCoveredColor(thisRow, inData.getDataTableSpec());
}
nrPattern++;
} catch (Exception e) {
LOGGER.error("Decision Tree evaluation failed: " + e.getMessage());
throw e;
}
if (cl == null) {
LOGGER.error("Decision Tree evaluation failed: result empty");
throw new Exception("Decision Tree evaluation failed.");
}
DataCell[] newCells = new DataCell[outSpec.getNumColumns()];
int numInCells = thisRow.getNumCells();
for (int i = 0; i < numInCells; i++) {
newCells[i] = thisRow.getCell(i);
}
if (m_showDistribution.getBooleanValue()) {
assert predictionValues.size() >= newCells.length - 1 - numInCells : "Could not determine the prediction values: " + newCells.length + "; " + numInCells + "; " + predictionValues;
for (int i = numInCells; i < newCells.length - 1; i++) {
String predClass = predictionValues.get(i - numInCells);
if (classDistrib != null && classDistrib.get(predClass) != null) {
newCells[i] = new DoubleCell(classDistrib.get(predClass));
} else {
newCells[i] = new DoubleCell(0.0);
}
}
}
newCells[newCells.length - 1] = cl;
outData.addRowToTable(new DefaultRow(thisRow.getKey(), newCells));
rowCount++;
if (rowCount % 100 == 0) {
exec.setProgress(rowCount / (double) numberRows, "Classifying... Row " + rowCount + " of " + numberRows);
}
exec.checkCanceled();
}
if (coveredPattern < nrPattern) {
// let the user know that we did not store all available pattern
// for HiLiting.
this.setWarningMessage("Tree only stored first " + m_maxNumCoveredPattern.getIntValue() + " (of " + nrPattern + ") rows for HiLiting!");
}
outData.close();
m_decTree = decTree;
exec.setMessage("Decision Tree Predictor: end execution.");
return new BufferedDataTable[] { outData.getTable() };
}
Aggregations