Search in sources :

Example 6 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class ColorExtractNodeModel method extractColorTable.

/**
 * @param nom
 * @return
 * @throws InvalidSettingsException
 */
private DataTable extractColorTable(final ColorModelNominal nom) throws InvalidSettingsException {
    DataType superType = null;
    for (DataCell c : nom) {
        if (superType == null) {
            superType = c.getType();
        } else {
            superType = DataType.getCommonSuperType(superType, c.getType());
        }
    }
    if (superType == null) {
        throw new InvalidSettingsException("No nominal values in model");
    }
    DataTableSpec spec = createSpec(superType);
    DataContainer cnt = new DataContainer(spec);
    int counter = 0;
    for (DataCell c : nom) {
        Color clr = nom.getColorAttr(c).getColor();
        DataRow row = new DefaultRow(RowKey.createRowKey(counter++), c, new IntCell(clr.getRed()), new IntCell(clr.getGreen()), new IntCell(clr.getBlue()), new IntCell(clr.getAlpha()), new IntCell(clr.getRGB()));
        cnt.addRowToTable(row);
    }
    cnt.close();
    return cnt.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataContainer(org.knime.core.data.container.DataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) Color(java.awt.Color) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell)

Example 7 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class LiftChartNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    ConvenienceMethods.checkTableSize(inData[0]);
    int predColIndex = inData[0].getDataTableSpec().findColumnIndex(m_responseColumn.getStringValue());
    List<String> inclList = new LinkedList<String>();
    inclList.add(m_probabilityColumn.getStringValue());
    boolean[] order = new boolean[] { false };
    SortedTable st = new SortedTable(inData[0], inclList, order, exec);
    long totalResponses = 0;
    double partWidth = Double.parseDouble(m_intervalWidth.getStringValue());
    int nrParts = (int) Math.ceil(100.0 / partWidth);
    List<Integer> positiveResponses = new LinkedList<Integer>();
    int rowIndex = 0;
    for (DataRow row : st) {
        if (row.getCell(predColIndex).isMissing()) {
            setWarningMessage("There are missing values." + " Please check your data.");
            continue;
        }
        String response = ((StringValue) row.getCell(predColIndex)).getStringValue().trim();
        if (response.equalsIgnoreCase(m_responseLabel.getStringValue())) {
            totalResponses++;
            positiveResponses.add(rowIndex);
        }
        rowIndex++;
    }
    int[] counter = new int[nrParts];
    int partWidthAbsolute = (int) Math.ceil(rowIndex / (double) nrParts);
    double avgResponse = (double) positiveResponses.size() / rowIndex;
    for (int rIndex : positiveResponses) {
        int index = rIndex / partWidthAbsolute;
        counter[index]++;
    }
    DataColumnSpec[] colSpec = new DataColumnSpec[3];
    colSpec[0] = new DataColumnSpecCreator("Lift", DoubleCell.TYPE).createSpec();
    colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
    colSpec[2] = new DataColumnSpecCreator("Cumulative Lift", DoubleCell.TYPE).createSpec();
    DataTableSpec tableSpec = new DataTableSpec(colSpec);
    DataContainer cont = exec.createDataContainer(tableSpec);
    colSpec = new DataColumnSpec[2];
    colSpec[0] = new DataColumnSpecCreator("Actual", DoubleCell.TYPE).createSpec();
    colSpec[1] = new DataColumnSpecCreator("Baseline", DoubleCell.TYPE).createSpec();
    tableSpec = new DataTableSpec(colSpec);
    DataContainer responseCont = exec.createDataContainer(tableSpec);
    long cumulativeCounter = 0;
    responseCont.addRowToTable(new DefaultRow(new RowKey("0"), 0.0, 0.0));
    for (int i = 0; i < counter.length; i++) {
        cumulativeCounter += counter[i];
        double responseRate = (double) counter[i] / partWidthAbsolute;
        double lift = responseRate / avgResponse;
        double cumResponseRate = (double) cumulativeCounter / totalResponses;
        long number = partWidthAbsolute * (i + 1);
        // well.. rounding problems
        if (number > rowIndex) {
            number = rowIndex;
        }
        double cumulativeLift = // (double)cumulativeCounter / (partWidthAbsolute * (i + 1));
        (double) cumulativeCounter / number;
        cumulativeLift /= avgResponse;
        // cumulativeLift = lifts / (i+1);
        double rowKey = ((i + 1) * partWidth);
        if (rowKey > 100) {
            rowKey = 100;
        }
        cont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), lift, 1.0, cumulativeLift));
        double cumBaseline = (i + 1) * partWidth;
        if (cumBaseline > 100) {
            cumBaseline = 100;
        }
        responseCont.addRowToTable(new DefaultRow(new RowKey("" + rowKey), cumResponseRate * 100, cumBaseline));
    }
    cont.close();
    responseCont.close();
    m_dataArray[0] = new DefaultDataArray(cont.getTable(), 1, (int) cont.size());
    m_dataArray[1] = new DefaultDataArray(responseCont.getTable(), 1, (int) responseCont.size());
    return new BufferedDataTable[] { st.getBufferedDataTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) LinkedList(java.util.LinkedList) DataContainer(org.knime.core.data.container.DataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) SortedTable(org.knime.base.data.sort.SortedTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 8 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class AbstractParallelNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected final BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    final DataTableSpec[] outSpecs = prepareExecute(data);
    final List<Future<BufferedDataContainer[]>> futures = new ArrayList<>();
    final BufferedDataTable[] additionalTables = new BufferedDataTable[Math.max(0, data.length - 1)];
    System.arraycopy(data, 1, additionalTables, 0, additionalTables.length);
    // do some consistency checks to bail out as early as possible
    if (outSpecs == null) {
        throw new NullPointerException("Implementation Error: The " + "array of generated output table specs can't be null.");
    }
    if (outSpecs.length != getNrOutPorts()) {
        throw new IllegalStateException("Implementation Error: Number of" + " provided DataTableSpecs doesn't match number of output" + " ports");
    }
    for (DataTableSpec outSpec : outSpecs) {
        if (outSpec == null) {
            throw new IllegalStateException("Implementation Error: The" + " generated output DataTableSpec is null.");
        }
    }
    final double max = data[0].size();
    final Callable<Void> submitter = new Callable<Void>() {

        @Override
        public Void call() throws Exception {
            final RowIterator it = data[0].iterator();
            BufferedDataContainer container = null;
            int count = 0, chunks = 0;
            while (true) {
                if ((count++ % m_chunkSize == 0) || !it.hasNext()) {
                    exec.checkCanceled();
                    if (container != null) {
                        container.close();
                        final BufferedDataContainer temp = container;
                        chunks++;
                        final int temp2 = chunks;
                        futures.add(m_workers.submit(new Callable<BufferedDataContainer[]>() {

                            @Override
                            public BufferedDataContainer[] call() throws Exception {
                                ExecutionMonitor subProg = exec.createSilentSubProgress((m_chunkSize > max) ? 1 : m_chunkSize / max);
                                exec.setMessage("Processing chunk " + temp2);
                                BufferedDataContainer[] result = new BufferedDataContainer[outSpecs.length];
                                for (int i = 0; i < outSpecs.length; i++) {
                                    result[i] = exec.createDataContainer(outSpecs[i], true, 0);
                                }
                                executeByChunk(temp.getTable(), additionalTables, result, subProg);
                                for (DataContainer c : result) {
                                    c.close();
                                }
                                exec.setProgress(temp2 * m_chunkSize / max);
                                return result;
                            }
                        }));
                    }
                    if (!it.hasNext()) {
                        break;
                    }
                    container = exec.createDataContainer(data[0].getDataTableSpec());
                }
                container.addRowToTable(it.next());
            }
            return null;
        }
    };
    try {
        m_workers.runInvisible(submitter);
    } catch (IllegalThreadStateException ex) {
        // this node has not been started by a thread from a thread pool.
        // This is odd, but may happen
        submitter.call();
    }
    final BufferedDataTable[][] tempTables = new BufferedDataTable[outSpecs.length][futures.size()];
    int k = 0;
    for (Future<BufferedDataContainer[]> results : futures) {
        try {
            exec.checkCanceled();
        } catch (CanceledExecutionException ex) {
            for (Future<BufferedDataContainer[]> cancel : futures) {
                cancel.cancel(true);
            }
            throw ex;
        }
        final BufferedDataContainer[] temp = results.get();
        if ((temp == null) || (temp.length != getNrOutPorts())) {
            throw new IllegalStateException("Invalid result. Execution " + " failed, reason: data is null or number " + "of outputs wrong.");
        }
        for (int i = 0; i < temp.length; i++) {
            tempTables[i][k] = temp[i].getTable();
        }
        k++;
    }
    final BufferedDataTable[] resultTables = new BufferedDataTable[outSpecs.length];
    for (int i = 0; i < resultTables.length; i++) {
        resultTables[i] = exec.createConcatenateTable(exec, tempTables[i]);
    }
    return resultTables;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ArrayList(java.util.ArrayList) Callable(java.util.concurrent.Callable) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataContainer(org.knime.core.data.container.DataContainer) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable) Future(java.util.concurrent.Future) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 9 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class EntropyCalculator method createScoreTable.

private static DataTable createScoreTable(final Map<RowKey, RowKey> referenceMap, final Map<RowKey, Set<RowKey>> clusteringMap) {
    ArrayList<DefaultRow> sortedRows = new ArrayList<DefaultRow>();
    // number of different clusters in reference clustering, used for
    // normalization
    int clusterCardinalityInReference = (new HashSet<RowKey>(referenceMap.values())).size();
    double normalization = Math.log(clusterCardinalityInReference) / Math.log(2.0);
    int totalSize = 0;
    for (Map.Entry<RowKey, Set<RowKey>> e : clusteringMap.entrySet()) {
        int size = e.getValue().size();
        DataCell sizeCell = new IntCell(size);
        totalSize += size;
        double entropy = entropy(referenceMap, e.getValue());
        DataCell entropyCell = new DoubleCell(entropy);
        DataCell normEntropy = new DoubleCell(entropy / normalization);
        DataCell quality = DataType.getMissingCell();
        RowKey clusterID = e.getKey();
        DefaultRow row = new DefaultRow(clusterID, sizeCell, entropyCell, normEntropy, quality);
        sortedRows.add(row);
    }
    Collections.sort(sortedRows, new Comparator<DefaultRow>() {

        @Override
        public int compare(final DefaultRow o1, final DefaultRow o2) {
            double e1 = ((DoubleValue) o1.getCell(2)).getDoubleValue();
            double e2 = ((DoubleValue) o2.getCell(2)).getDoubleValue();
            return e1 < e2 ? -1 : e1 > e2 ? 1 : 0;
        }
    });
    DataRow[] rows = sortedRows.toArray(new DataRow[0]);
    DataTableSpec tableSpec = getScoreTableSpec();
    DataContainer container = new DataContainer(tableSpec);
    for (DataRow r : rows) {
        container.addRowToTable(r);
    }
    // last row contains overall quality values
    double entropy = getEntropy(referenceMap, clusteringMap);
    double quality = getQuality(referenceMap, clusteringMap);
    DataCell entropyCell = new DoubleCell(entropy);
    DataCell normEntropy = new DoubleCell(entropy / normalization);
    DataCell qualityCell = new DoubleCell(quality);
    DataCell size = new IntCell(totalSize);
    RowKey clusterID = new RowKey("Overall");
    int uniquifier = 1;
    while (clusteringMap.containsKey(clusterID)) {
        clusterID = new RowKey("Overall (#" + (uniquifier++) + ")");
    }
    DefaultRow row = new DefaultRow(clusterID, size, entropyCell, normEntropy, qualityCell);
    container.addRowToTable(row);
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 10 with DataContainer

use of org.knime.core.data.container.DataContainer in project knime-core by knime.

the class HierarchicalClusterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    // determine the indices of the selected columns
    List<String> inlcludedCols = m_selectedColumns.getIncludeList();
    int[] selectedColIndices = new int[inlcludedCols.size()];
    for (int count = 0; count < selectedColIndices.length; count++) {
        selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
    }
    BufferedDataTable inputData = data[0];
    if (inputData.size() > 65500) {
        throw new RuntimeException("At most 65,500 patterns can be clustered");
    }
    DataTable outputData = null;
    if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
        m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
    } else {
        m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
    }
    // generate initial clustering
    // which means that every data point is one cluster
    List<ClusterNode> clusters = initClusters(inputData, exec);
    // store the distance per each fusion step
    DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
    int iterationStep = 0;
    final HalfFloatMatrix cache;
    if (m_cacheDistances.getBooleanValue()) {
        cache = new HalfFloatMatrix((int) inputData.size(), false);
        cache.fill(Float.NaN);
    } else {
        cache = null;
    }
    double max = inputData.size();
    // the number of clusters at the beginning is equal to the number
    // of data rows (each row is a cluster)
    int numberDataRows = clusters.size();
    while (clusters.size() > 1) {
        // checks if number clusters to generate output table is reached
        if (m_numClustersForOutput.getIntValue() == clusters.size()) {
            outputData = createResultTable(inputData, clusters, exec);
        }
        exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
        iterationStep++;
        exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
        // calculate distance between all clusters
        float currentSmallestDist = Float.MAX_VALUE;
        ClusterNode currentClosestCluster1 = null;
        ClusterNode currentClosestCluster2 = null;
        // subprogress for loop
        double availableProgress = (1.0 / numberDataRows);
        ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
        for (int i = 0; i < clusters.size(); i++) {
            exec.checkCanceled();
            ClusterNode node1 = clusters.get(i);
            for (int j = i + 1; j < clusters.size(); j++) {
                final float dist;
                ClusterNode node2 = clusters.get(j);
                // and average linkage supported.
                if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
                    dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
                } else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
                    dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
                } else {
                    dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
                }
                if (dist < currentSmallestDist) {
                    currentClosestCluster1 = node1;
                    currentClosestCluster2 = node2;
                    currentSmallestDist = dist;
                }
            }
        }
        subexec.setProgress(1.0);
        // make one cluster of the two closest
        ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
        clusters.remove(currentClosestCluster1);
        clusters.remove(currentClosestCluster2);
        clusters.add(newNode);
        // store the distance per each fusion step
        fusionCont.addRowToTable(new DefaultRow(// row key
        Integer.toString(clusters.size()), // x-axis scatter plotter
        new IntCell(clusters.size()), // y-axis scatter plotter
        new DoubleCell(newNode.getDist())));
    // // print number clusters and their data points
    // LOGGER.debug("Iteration " + iterationStep + ":");
    // LOGGER.debug(" Number Clusters: " + clusters.size());
    // printClustersDataRows(clusters);
    }
    if (clusters.size() > 0) {
        m_rootNode = clusters.get(0);
    }
    fusionCont.close();
    // if there was no input data create an empty output data
    if (outputData == null) {
        outputData = createResultTable(inputData, clusters, exec);
    }
    m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
    m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
    return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) BufferedDataTable(org.knime.core.node.BufferedDataTable) HalfFloatMatrix(org.knime.base.util.HalfFloatMatrix) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DataContainer (org.knime.core.data.container.DataContainer)35 DataTableSpec (org.knime.core.data.DataTableSpec)25 DefaultRow (org.knime.core.data.def.DefaultRow)21 DataRow (org.knime.core.data.DataRow)19 DataCell (org.knime.core.data.DataCell)17 BufferedDataTable (org.knime.core.node.BufferedDataTable)15 RowKey (org.knime.core.data.RowKey)10 ArrayList (java.util.ArrayList)9 DoubleCell (org.knime.core.data.def.DoubleCell)9 IntCell (org.knime.core.data.def.IntCell)8 LinkedHashMap (java.util.LinkedHashMap)7 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)7 HashSet (java.util.HashSet)6 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 RowIterator (org.knime.core.data.RowIterator)6 StringCell (org.knime.core.data.def.StringCell)6 Map (java.util.Map)5 Set (java.util.Set)5 SortedTable (org.knime.base.data.sort.SortedTable)5 DataTable (org.knime.core.data.DataTable)5