Search in sources :

Example 66 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class EntropyCalculator method createScoreTable.

private static DataTable createScoreTable(final Map<RowKey, RowKey> referenceMap, final Map<RowKey, Set<RowKey>> clusteringMap) {
    ArrayList<DefaultRow> sortedRows = new ArrayList<DefaultRow>();
    // number of different clusters in reference clustering, used for
    // normalization
    int clusterCardinalityInReference = (new HashSet<RowKey>(referenceMap.values())).size();
    double normalization = Math.log(clusterCardinalityInReference) / Math.log(2.0);
    int totalSize = 0;
    for (Map.Entry<RowKey, Set<RowKey>> e : clusteringMap.entrySet()) {
        int size = e.getValue().size();
        DataCell sizeCell = new IntCell(size);
        totalSize += size;
        double entropy = entropy(referenceMap, e.getValue());
        DataCell entropyCell = new DoubleCell(entropy);
        DataCell normEntropy = new DoubleCell(entropy / normalization);
        DataCell quality = DataType.getMissingCell();
        RowKey clusterID = e.getKey();
        DefaultRow row = new DefaultRow(clusterID, sizeCell, entropyCell, normEntropy, quality);
        sortedRows.add(row);
    }
    Collections.sort(sortedRows, new Comparator<DefaultRow>() {

        @Override
        public int compare(final DefaultRow o1, final DefaultRow o2) {
            double e1 = ((DoubleValue) o1.getCell(2)).getDoubleValue();
            double e2 = ((DoubleValue) o2.getCell(2)).getDoubleValue();
            return e1 < e2 ? -1 : e1 > e2 ? 1 : 0;
        }
    });
    DataRow[] rows = sortedRows.toArray(new DataRow[0]);
    DataTableSpec tableSpec = getScoreTableSpec();
    DataContainer container = new DataContainer(tableSpec);
    for (DataRow r : rows) {
        container.addRowToTable(r);
    }
    // last row contains overall quality values
    double entropy = getEntropy(referenceMap, clusteringMap);
    double quality = getQuality(referenceMap, clusteringMap);
    DataCell entropyCell = new DoubleCell(entropy);
    DataCell normEntropy = new DoubleCell(entropy / normalization);
    DataCell qualityCell = new DoubleCell(quality);
    DataCell size = new IntCell(totalSize);
    RowKey clusterID = new RowKey("Overall");
    int uniquifier = 1;
    while (clusteringMap.containsKey(clusterID)) {
        clusterID = new RowKey("Overall (#" + (uniquifier++) + ")");
    }
    DefaultRow row = new DefaultRow(clusterID, size, entropyCell, normEntropy, qualityCell);
    container.addRowToTable(row);
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 67 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class PolyRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    final int colCount = inSpec.getNumColumns();
    String[] selectedCols = computeSelectedColumns(inSpec);
    Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
    m_colSelected = new boolean[colCount];
    for (int i = 0; i < colCount; i++) {
        m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
    }
    final int rowCount = inTable.getRowCount();
    String[] temp = new String[m_columnNames.length + 1];
    System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
    temp[temp.length - 1] = m_settings.getTargetColumn();
    FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
    final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
    PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
    Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
    try {
        PolyRegContent polyRegContent = learner.perform(inTable, exec);
        m_betas = fillBeta(polyRegContent);
        m_meanValues = polyRegContent.getMeans();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
        m_squaredError /= rowCount;
        if (polyRegContent.getWarningMessage() != null) {
            setWarningMessage(polyRegContent.getWarningMessage());
        }
        double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
        double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
        double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        return bdt;
    } catch (ModelSpecificationException e) {
        final String origWarning = getWarningMessage();
        final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
        setWarningMessage(warning);
        final ExecutionContext subExec = exec.createSubExecutionContext(.1);
        final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
        int rowIdx = 1;
        for (final String column : m_columnNames) {
            for (int d = 1; d <= m_settings.getDegree(); ++d) {
                empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
            }
        }
        empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
        double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
        Arrays.fill(nans, Double.NaN);
        m_betas = new double[nans.length];
        // Mean only for the linear tags
        m_meanValues = new double[nans.length / m_settings.getDegree()];
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        empty.close();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
        PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
        PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
        return bdt;
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DoubleCell(org.knime.core.data.def.DoubleCell) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) DataArray(org.knime.base.node.util.DataArray) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ModelSpecificationException(org.apache.commons.math3.stat.regression.ModelSpecificationException) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) HashSet(java.util.HashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StringCell(org.knime.core.data.def.StringCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 68 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class HierarchicalClusterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    // determine the indices of the selected columns
    List<String> inlcludedCols = m_selectedColumns.getIncludeList();
    int[] selectedColIndices = new int[inlcludedCols.size()];
    for (int count = 0; count < selectedColIndices.length; count++) {
        selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
    }
    BufferedDataTable inputData = data[0];
    if (inputData.size() > 65500) {
        throw new RuntimeException("At most 65,500 patterns can be clustered");
    }
    DataTable outputData = null;
    if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
        m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
    } else {
        m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
    }
    // generate initial clustering
    // which means that every data point is one cluster
    List<ClusterNode> clusters = initClusters(inputData, exec);
    // store the distance per each fusion step
    DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
    int iterationStep = 0;
    final HalfFloatMatrix cache;
    if (m_cacheDistances.getBooleanValue()) {
        cache = new HalfFloatMatrix((int) inputData.size(), false);
        cache.fill(Float.NaN);
    } else {
        cache = null;
    }
    double max = inputData.size();
    // the number of clusters at the beginning is equal to the number
    // of data rows (each row is a cluster)
    int numberDataRows = clusters.size();
    while (clusters.size() > 1) {
        // checks if number clusters to generate output table is reached
        if (m_numClustersForOutput.getIntValue() == clusters.size()) {
            outputData = createResultTable(inputData, clusters, exec);
        }
        exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
        iterationStep++;
        exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
        // calculate distance between all clusters
        float currentSmallestDist = Float.MAX_VALUE;
        ClusterNode currentClosestCluster1 = null;
        ClusterNode currentClosestCluster2 = null;
        // subprogress for loop
        double availableProgress = (1.0 / numberDataRows);
        ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
        for (int i = 0; i < clusters.size(); i++) {
            exec.checkCanceled();
            ClusterNode node1 = clusters.get(i);
            for (int j = i + 1; j < clusters.size(); j++) {
                final float dist;
                ClusterNode node2 = clusters.get(j);
                // and average linkage supported.
                if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
                    dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
                } else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
                    dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
                } else {
                    dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
                }
                if (dist < currentSmallestDist) {
                    currentClosestCluster1 = node1;
                    currentClosestCluster2 = node2;
                    currentSmallestDist = dist;
                }
            }
        }
        subexec.setProgress(1.0);
        // make one cluster of the two closest
        ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
        clusters.remove(currentClosestCluster1);
        clusters.remove(currentClosestCluster2);
        clusters.add(newNode);
        // store the distance per each fusion step
        fusionCont.addRowToTable(new DefaultRow(// row key
        Integer.toString(clusters.size()), // x-axis scatter plotter
        new IntCell(clusters.size()), // y-axis scatter plotter
        new DoubleCell(newNode.getDist())));
    // // print number clusters and their data points
    // LOGGER.debug("Iteration " + iterationStep + ":");
    // LOGGER.debug(" Number Clusters: " + clusters.size());
    // printClustersDataRows(clusters);
    }
    if (clusters.size() > 0) {
        m_rootNode = clusters.get(0);
    }
    fusionCont.close();
    // if there was no input data create an empty output data
    if (outputData == null) {
        outputData = createResultTable(inputData, clusters, exec);
    }
    m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
    m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
    return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) BufferedDataTable(org.knime.core.node.BufferedDataTable) HalfFloatMatrix(org.knime.base.util.HalfFloatMatrix) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 69 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class DecTreePredictorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
public PortObject[] execute(final PortObject[] inPorts, final ExecutionContext exec) throws CanceledExecutionException, Exception {
    exec.setMessage("Decision Tree Predictor: Loading predictor...");
    PMMLPortObject port = (PMMLPortObject) inPorts[INMODELPORT];
    List<Node> models = port.getPMMLValue().getModels(PMMLModelType.TreeModel);
    if (models.isEmpty()) {
        String msg = "Decision Tree evaluation failed: " + "No tree model found.";
        LOGGER.error(msg);
        throw new RuntimeException(msg);
    }
    PMMLDecisionTreeTranslator trans = new PMMLDecisionTreeTranslator();
    port.initializeModelTranslator(trans);
    DecisionTree decTree = trans.getDecisionTree();
    decTree.resetColorInformation();
    BufferedDataTable inData = (BufferedDataTable) inPorts[INDATAPORT];
    // get column with color information
    String colorColumn = null;
    for (DataColumnSpec s : inData.getDataTableSpec()) {
        if (s.getColorHandler() != null) {
            colorColumn = s.getName();
            break;
        }
    }
    decTree.setColorColumn(colorColumn);
    exec.setMessage("Decision Tree Predictor: start execution.");
    PortObjectSpec[] inSpecs = new PortObjectSpec[] { inPorts[0].getSpec(), inPorts[1].getSpec() };
    DataTableSpec outSpec = createOutTableSpec(inSpecs);
    BufferedDataContainer outData = exec.createDataContainer(outSpec);
    long coveredPattern = 0;
    long nrPattern = 0;
    long rowCount = 0;
    final long numberRows = inData.size();
    exec.setMessage("Classifying...");
    List<String> predictionValues = getPredictionStrings((PMMLPortObjectSpec) inPorts[INMODELPORT].getSpec());
    for (DataRow thisRow : inData) {
        DataCell cl = null;
        LinkedHashMap<String, Double> classDistrib = null;
        try {
            Pair<DataCell, LinkedHashMap<DataCell, Double>> pair = decTree.getWinnerAndClasscounts(thisRow, inData.getDataTableSpec());
            cl = pair.getFirst();
            LinkedHashMap<DataCell, Double> classCounts = pair.getSecond();
            classDistrib = getDistribution(classCounts);
            if (coveredPattern < m_maxNumCoveredPattern.getIntValue()) {
                // remember this one for HiLite support
                decTree.addCoveredPattern(thisRow, inData.getDataTableSpec());
                coveredPattern++;
            } else {
                // too many patterns for HiLite - at least remember color
                decTree.addCoveredColor(thisRow, inData.getDataTableSpec());
            }
            nrPattern++;
        } catch (Exception e) {
            LOGGER.error("Decision Tree evaluation failed: " + e.getMessage());
            throw e;
        }
        if (cl == null) {
            LOGGER.error("Decision Tree evaluation failed: result empty");
            throw new Exception("Decision Tree evaluation failed.");
        }
        DataCell[] newCells = new DataCell[outSpec.getNumColumns()];
        int numInCells = thisRow.getNumCells();
        for (int i = 0; i < numInCells; i++) {
            newCells[i] = thisRow.getCell(i);
        }
        if (m_showDistribution.getBooleanValue()) {
            assert predictionValues.size() >= newCells.length - 1 - numInCells : "Could not determine the prediction values: " + newCells.length + "; " + numInCells + "; " + predictionValues;
            for (int i = numInCells; i < newCells.length - 1; i++) {
                String predClass = predictionValues.get(i - numInCells);
                if (classDistrib != null && classDistrib.get(predClass) != null) {
                    newCells[i] = new DoubleCell(classDistrib.get(predClass));
                } else {
                    newCells[i] = new DoubleCell(0.0);
                }
            }
        }
        newCells[newCells.length - 1] = cl;
        outData.addRowToTable(new DefaultRow(thisRow.getKey(), newCells));
        rowCount++;
        if (rowCount % 100 == 0) {
            exec.setProgress(rowCount / (double) numberRows, "Classifying... Row " + rowCount + " of " + numberRows);
        }
        exec.checkCanceled();
    }
    if (coveredPattern < nrPattern) {
        // let the user know that we did not store all available pattern
        // for HiLiting.
        this.setWarningMessage("Tree only stored first " + m_maxNumCoveredPattern.getIntValue() + " (of " + nrPattern + ") rows for HiLiting!");
    }
    outData.close();
    m_decTree = decTree;
    exec.setMessage("Decision Tree Predictor: end execution.");
    return new BufferedDataTable[] { outData.getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLDecisionTreeTranslator(org.knime.base.node.mine.decisiontree2.PMMLDecisionTreeTranslator) DoubleCell(org.knime.core.data.def.DoubleCell) Node(org.w3c.dom.Node) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) DecisionTree(org.knime.base.node.mine.decisiontree2.model.DecisionTree) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 70 with DefaultRow

use of org.knime.core.data.def.DefaultRow in project knime-core by knime.

the class PCANodeModel method createCovarianceTable.

/**
 * create data table from covariance matrix.
 *
 * @param exec
 *            execution context
 * @param m
 *            covariance matrix
 * @param inputColumnNames
 *            names of input columns the matrix was created from
 * @return table
 */
public static BufferedDataTable createCovarianceTable(final ExecutionContext exec, final double[][] m, final String[] inputColumnNames) {
    final BufferedDataContainer bdt = exec.createDataContainer(createCovarianceMatrixSpec(inputColumnNames));
    for (int i = 0; i < m.length; i++) {
        final DataCell[] cells = new DataCell[inputColumnNames.length];
        for (int j = 0; j < m[i].length; j++) {
            cells[j] = new DoubleCell(m[i][j]);
        }
        bdt.addRowToTable(new DefaultRow(inputColumnNames[i], cells));
    }
    bdt.close();
    final BufferedDataTable covarianceTable = bdt.getTable();
    return covarianceTable;
}
Also used : BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DoubleCell(org.knime.core.data.def.DoubleCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

DefaultRow (org.knime.core.data.def.DefaultRow)207 DataCell (org.knime.core.data.DataCell)165 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)94 DataTableSpec (org.knime.core.data.DataTableSpec)92 DataRow (org.knime.core.data.DataRow)88 RowKey (org.knime.core.data.RowKey)80 DoubleCell (org.knime.core.data.def.DoubleCell)66 StringCell (org.knime.core.data.def.StringCell)65 BufferedDataTable (org.knime.core.node.BufferedDataTable)56 IntCell (org.knime.core.data.def.IntCell)46 ArrayList (java.util.ArrayList)26 DataType (org.knime.core.data.DataType)26 DataColumnSpec (org.knime.core.data.DataColumnSpec)22 DataContainer (org.knime.core.data.container.DataContainer)21 HashSet (java.util.HashSet)18 LinkedHashMap (java.util.LinkedHashMap)17 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)16 LinkedHashSet (java.util.LinkedHashSet)14 DoubleValue (org.knime.core.data.DoubleValue)14 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)14