use of org.knime.core.data.container.DataContainer in project knime-core by knime.
the class ColorExtractNodeModel method extractColorTable.
/**
* @param nom
* @return
* @throws InvalidSettingsException
*/
private DataTable extractColorTable(final ColorModelNominal nom) throws InvalidSettingsException {
DataType superType = null;
for (DataCell c : nom) {
if (superType == null) {
superType = c.getType();
} else {
superType = DataType.getCommonSuperType(superType, c.getType());
}
}
if (superType == null) {
throw new InvalidSettingsException("No nominal values in model");
}
DataTableSpec spec = createSpec(superType);
DataContainer cnt = new DataContainer(spec);
int counter = 0;
for (DataCell c : nom) {
Color clr = nom.getColorAttr(c).getColor();
DataRow row = new DefaultRow(RowKey.createRowKey(counter++), c, new IntCell(clr.getRed()), new IntCell(clr.getGreen()), new IntCell(clr.getBlue()), new IntCell(clr.getAlpha()), new IntCell(clr.getRGB()));
cnt.addRowToTable(row);
}
cnt.close();
return cnt.getTable();
}
use of org.knime.core.data.container.DataContainer in project knime-core by knime.
the class LiftChartNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
ConvenienceMethods.checkTableSize(inData[0]);
int predColIndex = inData[0].getDataTableSpec().findColumnIndex(m_responseColumn.getStringValue());
List<String> inclList = new LinkedList<String>();
inclList.add(m_probabilityColumn.getStringValue());
boolean[] order = new boolean[] { false };
SortedTable st = new SortedTable(inData[0], inclList, order, exec);
long totalResponses = 0;
double partWidth = Double.parseDouble(m_intervalWidth.getStringValue());
int nrParts = (int) Math.ceil(100.0 / partWidth);
List<Integer> positiveResponses = new LinkedList<Integer>();
int rowIndex = 0;
for (DataRow row : st) {
if (row.getCell(predColIndex).isMissing()) {
setWarningMessage("There are missing values." + " Please check your data.");
continue;
}
String response = ((StringValue) row.getCell(predColIndex)).getStringValue().trim();
if (response.equalsIgnoreCase(m_responseLabel.getStringValue())) {
totalResponses++;
positiveResponses.add(rowIndex);
}
rowIndex++;
}
int[] counter = new int[nrParts];
int partWidthAbsolute = (int) Math.ceil(rowIndex / (double) nrParts);
double avgResponse = (double) positiveResponses.size() / rowIndex;
for (int rIndex : positiveResponses) {
int index = rIndex / partWidthAbsolute;
counter[index]++;
}
DataColumnSpec[] colSpec = new DataColumnSpec[3];
colSpec[0] = new DataColumnSpecCreator("Lift", DoubleCell.TYPE).createSpec();
colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
colSpec[2] = new DataColumnSpecCreator("Cumulative Lift", DoubleCell.TYPE).createSpec();
DataTableSpec tableSpec = new DataTableSpec(colSpec);
DataContainer cont = exec.createDataContainer(tableSpec);
colSpec = new DataColumnSpec[2];
colSpec[0] = new DataColumnSpecCreator("Actual", DoubleCell.TYPE).createSpec();
colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
tableSpec = new DataTableSpec(colSpec);
DataContainer responseCont = exec.createDataContainer(tableSpec);
long cumulativeCounter = 0;
responseCont.addRowToTable(new DefaultRow(new RowKey("0"), 0.0, 0.0));
for (int i = 0; i < counter.length; i++) {
cumulativeCounter += counter[i];
double responseRate = (double) counter[i] / partWidthAbsolute;
double lift = responseRate / avgResponse;
double cumResponseRate = (double) cumulativeCounter / totalResponses;
long number = partWidthAbsolute * (i + 1);
// well.. rounding problems
if (number > rowIndex) {
number = rowIndex;
}
double cumulativeLift = // (double)cumulativeCounter / (partWidthAbsolute * (i + 1));
(double) cumulativeCounter / number;
cumulativeLift /= avgResponse;
// cumulativeLift = lifts / (i+1);
double rowKey = ((i + 1) * partWidth);
if (rowKey > 100) {
rowKey = 100;
}
cont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), lift, 1.0, cumulativeLift));
double cumBaseline = (i + 1) * partWidth;
if (cumBaseline > 100) {
cumBaseline = 100;
}
responseCont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), cumResponseRate * 100, cumBaseline));
}
cont.close();
responseCont.close();
m_dataArray[0] = new DefaultDataArray(cont.getTable(), 1, (int) cont.size());
m_dataArray[1] = new DefaultDataArray(responseCont.getTable(), 1, (int) responseCont.size());
return new BufferedDataTable[] { st.getBufferedDataTable() };
}
use of org.knime.core.data.container.DataContainer in project knime-core by knime.
the class AbstractParallelNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected final BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
final DataTableSpec[] outSpecs = prepareExecute(data);
final List<Future<BufferedDataContainer[]>> futures = new ArrayList<>();
final BufferedDataTable[] additionalTables = new BufferedDataTable[Math.max(0, data.length - 1)];
System.arraycopy(data, 1, additionalTables, 0, additionalTables.length);
// do some consistency checks to bail out as early as possible
if (outSpecs == null) {
throw new NullPointerException("Implementation Error: The " + "array of generated output table specs can't be null.");
}
if (outSpecs.length != getNrOutPorts()) {
throw new IllegalStateException("Implementation Error: Number of" + " provided DataTableSpecs doesn't match number of output" + " ports");
}
for (DataTableSpec outSpec : outSpecs) {
if (outSpec == null) {
throw new IllegalStateException("Implementation Error: The" + " generated output DataTableSpec is null.");
}
}
final double max = data[0].size();
final Callable<Void> submitter = new Callable<Void>() {
@Override
public Void call() throws Exception {
final RowIterator it = data[0].iterator();
BufferedDataContainer container = null;
int count = 0, chunks = 0;
while (true) {
if ((count++ % m_chunkSize == 0) || !it.hasNext()) {
exec.checkCanceled();
if (container != null) {
container.close();
final BufferedDataContainer temp = container;
chunks++;
final int temp2 = chunks;
futures.add(m_workers.submit(new Callable<BufferedDataContainer[]>() {
@Override
public BufferedDataContainer[] call() throws Exception {
ExecutionMonitor subProg = exec.createSilentSubProgress((m_chunkSize > max) ? 1 : m_chunkSize / max);
exec.setMessage("Processing chunk " + temp2);
BufferedDataContainer[] result = new BufferedDataContainer[outSpecs.length];
for (int i = 0; i < outSpecs.length; i++) {
result[i] = exec.createDataContainer(outSpecs[i], true, 0);
}
executeByChunk(temp.getTable(), additionalTables, result, subProg);
for (DataContainer c : result) {
c.close();
}
exec.setProgress(temp2 * m_chunkSize / max);
return result;
}
}));
}
if (!it.hasNext()) {
break;
}
container = exec.createDataContainer(data[0].getDataTableSpec());
}
container.addRowToTable(it.next());
}
return null;
}
};
try {
m_workers.runInvisible(submitter);
} catch (IllegalThreadStateException ex) {
// this node has not been started by a thread from a thread pool.
// This is odd, but may happen
submitter.call();
}
final BufferedDataTable[][] tempTables = new BufferedDataTable[outSpecs.length][futures.size()];
int k = 0;
for (Future<BufferedDataContainer[]> results : futures) {
try {
exec.checkCanceled();
} catch (CanceledExecutionException ex) {
for (Future<BufferedDataContainer[]> cancel : futures) {
cancel.cancel(true);
}
throw ex;
}
final BufferedDataContainer[] temp = results.get();
if ((temp == null) || (temp.length != getNrOutPorts())) {
throw new IllegalStateException("Invalid result. Execution " + " failed, reason: data is null or number " + "of outputs wrong.");
}
for (int i = 0; i < temp.length; i++) {
tempTables[i][k] = temp[i].getTable();
}
k++;
}
final BufferedDataTable[] resultTables = new BufferedDataTable[outSpecs.length];
for (int i = 0; i < resultTables.length; i++) {
resultTables[i] = exec.createConcatenateTable(exec, tempTables[i]);
}
return resultTables;
}
use of org.knime.core.data.container.DataContainer in project knime-core by knime.
the class EntropyCalculator method createScoreTable.
private static DataTable createScoreTable(final Map<RowKey, RowKey> referenceMap, final Map<RowKey, Set<RowKey>> clusteringMap) {
ArrayList<DefaultRow> sortedRows = new ArrayList<DefaultRow>();
// number of different clusters in reference clustering, used for
// normalization
int clusterCardinalityInReference = (new HashSet<RowKey>(referenceMap.values())).size();
double normalization = Math.log(clusterCardinalityInReference) / Math.log(2.0);
int totalSize = 0;
for (Map.Entry<RowKey, Set<RowKey>> e : clusteringMap.entrySet()) {
int size = e.getValue().size();
DataCell sizeCell = new IntCell(size);
totalSize += size;
double entropy = entropy(referenceMap, e.getValue());
DataCell entropyCell = new DoubleCell(entropy);
DataCell normEntropy = new DoubleCell(entropy / normalization);
DataCell quality = DataType.getMissingCell();
RowKey clusterID = e.getKey();
DefaultRow row = new DefaultRow(clusterID, sizeCell, entropyCell, normEntropy, quality);
sortedRows.add(row);
}
Collections.sort(sortedRows, new Comparator<DefaultRow>() {
@Override
public int compare(final DefaultRow o1, final DefaultRow o2) {
double e1 = ((DoubleValue) o1.getCell(2)).getDoubleValue();
double e2 = ((DoubleValue) o2.getCell(2)).getDoubleValue();
return e1 < e2 ? -1 : e1 > e2 ? 1 : 0;
}
});
DataRow[] rows = sortedRows.toArray(new DataRow[0]);
DataTableSpec tableSpec = getScoreTableSpec();
DataContainer container = new DataContainer(tableSpec);
for (DataRow r : rows) {
container.addRowToTable(r);
}
// last row contains overall quality values
double entropy = getEntropy(referenceMap, clusteringMap);
double quality = getQuality(referenceMap, clusteringMap);
DataCell entropyCell = new DoubleCell(entropy);
DataCell normEntropy = new DoubleCell(entropy / normalization);
DataCell qualityCell = new DoubleCell(quality);
DataCell size = new IntCell(totalSize);
RowKey clusterID = new RowKey("Overall");
int uniquifier = 1;
while (clusteringMap.containsKey(clusterID)) {
clusterID = new RowKey("Overall (#" + (uniquifier++) + ")");
}
DefaultRow row = new DefaultRow(clusterID, size, entropyCell, normEntropy, qualityCell);
container.addRowToTable(row);
container.close();
return container.getTable();
}
use of org.knime.core.data.container.DataContainer in project knime-core by knime.
the class HierarchicalClusterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
// determine the indices of the selected columns
List<String> inlcludedCols = m_selectedColumns.getIncludeList();
int[] selectedColIndices = new int[inlcludedCols.size()];
for (int count = 0; count < selectedColIndices.length; count++) {
selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
}
BufferedDataTable inputData = data[0];
if (inputData.size() > 65500) {
throw new RuntimeException("At most 65,500 patterns can be clustered");
}
DataTable outputData = null;
if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
} else {
m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
}
// generate initial clustering
// which means that every data point is one cluster
List<ClusterNode> clusters = initClusters(inputData, exec);
// store the distance per each fusion step
DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
int iterationStep = 0;
final HalfFloatMatrix cache;
if (m_cacheDistances.getBooleanValue()) {
cache = new HalfFloatMatrix((int) inputData.size(), false);
cache.fill(Float.NaN);
} else {
cache = null;
}
double max = inputData.size();
// the number of clusters at the beginning is equal to the number
// of data rows (each row is a cluster)
int numberDataRows = clusters.size();
while (clusters.size() > 1) {
// checks if number clusters to generate output table is reached
if (m_numClustersForOutput.getIntValue() == clusters.size()) {
outputData = createResultTable(inputData, clusters, exec);
}
exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
iterationStep++;
exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
// calculate distance between all clusters
float currentSmallestDist = Float.MAX_VALUE;
ClusterNode currentClosestCluster1 = null;
ClusterNode currentClosestCluster2 = null;
// subprogress for loop
double availableProgress = (1.0 / numberDataRows);
ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
for (int i = 0; i < clusters.size(); i++) {
exec.checkCanceled();
ClusterNode node1 = clusters.get(i);
for (int j = i + 1; j < clusters.size(); j++) {
final float dist;
ClusterNode node2 = clusters.get(j);
// and average linkage supported.
if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
} else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
} else {
dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
}
if (dist < currentSmallestDist) {
currentClosestCluster1 = node1;
currentClosestCluster2 = node2;
currentSmallestDist = dist;
}
}
}
subexec.setProgress(1.0);
// make one cluster of the two closest
ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
clusters.remove(currentClosestCluster1);
clusters.remove(currentClosestCluster2);
clusters.add(newNode);
// store the distance per each fusion step
fusionCont.addRowToTable(new DefaultRow(// row key
Integer.toString(clusters.size()), // x-axis scatter plotter
new IntCell(clusters.size()), // y-axis scatter plotter
new DoubleCell(newNode.getDist())));
// // print number clusters and their data points
// LOGGER.debug("Iteration " + iterationStep + ":");
// LOGGER.debug(" Number Clusters: " + clusters.size());
// printClustersDataRows(clusters);
}
if (clusters.size() > 0) {
m_rootNode = clusters.get(0);
}
fusionCont.close();
// if there was no input data create an empty output data
if (outputData == null) {
outputData = createResultTable(inputData, clusters, exec);
}
m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Aggregations