Search in sources :

Example 36 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class ClassAttributeModel method createDataRows.

/**
 * {@inheritDoc}
 */
@Override
void createDataRows(final ExecutionMonitor exec, final BufferedDataContainer dc, final boolean ignoreMissing, final AtomicInteger rowId) throws CanceledExecutionException {
    final List<String> sortedClassVal = AttributeModel.sortCollection(m_recsCounterByClassVal.keySet());
    if (sortedClassVal == null) {
        return;
    }
    final StringCell attributeNameCell = new StringCell(getAttributeName());
    for (final String classVal : sortedClassVal) {
        final StringCell classCell = new StringCell(classVal);
        final List<DataCell> cells = new LinkedList<>();
        cells.add(attributeNameCell);
        cells.add(DataType.getMissingCell());
        cells.add(classCell);
        cells.add(new IntCell(getNoOfRecs4ClassValue(classVal)));
        if (!ignoreMissing) {
            cells.add(new IntCell(getNoOfMissingVals()));
        }
        cells.add(DataType.getMissingCell());
        cells.add(DataType.getMissingCell());
        dc.addRowToTable(new DefaultRow(RowKey.createRowKey(rowId.getAndIncrement()), cells.toArray(new DataCell[0])));
    }
}
Also used : StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) LinkedList(java.util.LinkedList) IntCell(org.knime.core.data.def.IntCell)

Example 37 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class NominalAttributeModel method createDataRows.

/**
 * {@inheritDoc}
 */
@Override
void createDataRows(final ExecutionMonitor exec, final BufferedDataContainer dc, final boolean ignoreMissing, final AtomicInteger rowId) throws CanceledExecutionException {
    final List<String> sortedClassVal = AttributeModel.sortCollection(m_classValues.keySet());
    if (sortedClassVal == null) {
        return;
    }
    final List<String> sortedAttrValues = AttributeModel.sortCollection(m_attributeVals);
    final StringCell attributeNameCell = new StringCell(getAttributeName());
    for (final String attrVal : sortedAttrValues) {
        final StringCell attributeValueCell = new StringCell(attrVal);
        for (final String classVal : sortedClassVal) {
            final StringCell classCell = new StringCell(classVal);
            final NominalClassValue classValue = m_classValues.get(classVal);
            final List<DataCell> cells = new LinkedList<>();
            cells.add(attributeNameCell);
            cells.add(attributeValueCell);
            cells.add(classCell);
            cells.add(new IntCell(classValue.getNoOfRows4AttributeValue(attrVal)));
            if (!ignoreMissing) {
                cells.add(new IntCell(classValue.getNoOfMissingValueRecs()));
            }
            cells.add(DataType.getMissingCell());
            cells.add(DataType.getMissingCell());
            dc.addRowToTable(new DefaultRow(RowKey.createRowKey(rowId.getAndIncrement()), cells.toArray(new DataCell[0])));
        }
    }
}
Also used : StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) LinkedList(java.util.LinkedList) IntCell(org.knime.core.data.def.IntCell)

Example 38 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class EntropyCalculator method createScoreTable.

private static DataTable createScoreTable(final Map<RowKey, RowKey> referenceMap, final Map<RowKey, Set<RowKey>> clusteringMap) {
    ArrayList<DefaultRow> sortedRows = new ArrayList<DefaultRow>();
    // number of different clusters in reference clustering, used for
    // normalization
    int clusterCardinalityInReference = (new HashSet<RowKey>(referenceMap.values())).size();
    double normalization = Math.log(clusterCardinalityInReference) / Math.log(2.0);
    int totalSize = 0;
    for (Map.Entry<RowKey, Set<RowKey>> e : clusteringMap.entrySet()) {
        int size = e.getValue().size();
        DataCell sizeCell = new IntCell(size);
        totalSize += size;
        double entropy = entropy(referenceMap, e.getValue());
        DataCell entropyCell = new DoubleCell(entropy);
        DataCell normEntropy = new DoubleCell(entropy / normalization);
        DataCell quality = DataType.getMissingCell();
        RowKey clusterID = e.getKey();
        DefaultRow row = new DefaultRow(clusterID, sizeCell, entropyCell, normEntropy, quality);
        sortedRows.add(row);
    }
    Collections.sort(sortedRows, new Comparator<DefaultRow>() {

        @Override
        public int compare(final DefaultRow o1, final DefaultRow o2) {
            double e1 = ((DoubleValue) o1.getCell(2)).getDoubleValue();
            double e2 = ((DoubleValue) o2.getCell(2)).getDoubleValue();
            return e1 < e2 ? -1 : e1 > e2 ? 1 : 0;
        }
    });
    DataRow[] rows = sortedRows.toArray(new DataRow[0]);
    DataTableSpec tableSpec = getScoreTableSpec();
    DataContainer container = new DataContainer(tableSpec);
    for (DataRow r : rows) {
        container.addRowToTable(r);
    }
    // last row contains overall quality values
    double entropy = getEntropy(referenceMap, clusteringMap);
    double quality = getQuality(referenceMap, clusteringMap);
    DataCell entropyCell = new DoubleCell(entropy);
    DataCell normEntropy = new DoubleCell(entropy / normalization);
    DataCell qualityCell = new DoubleCell(quality);
    DataCell size = new IntCell(totalSize);
    RowKey clusterID = new RowKey("Overall");
    int uniquifier = 1;
    while (clusteringMap.containsKey(clusterID)) {
        clusterID = new RowKey("Overall (#" + (uniquifier++) + ")");
    }
    DefaultRow row = new DefaultRow(clusterID, size, entropyCell, normEntropy, qualityCell);
    container.addRowToTable(row);
    container.close();
    return container.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Set(java.util.Set) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet)

Example 39 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class PolyRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    final int colCount = inSpec.getNumColumns();
    String[] selectedCols = computeSelectedColumns(inSpec);
    Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
    m_colSelected = new boolean[colCount];
    for (int i = 0; i < colCount; i++) {
        m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
    }
    final int rowCount = inTable.getRowCount();
    String[] temp = new String[m_columnNames.length + 1];
    System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
    temp[temp.length - 1] = m_settings.getTargetColumn();
    FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
    final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
    PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
    Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
    try {
        PolyRegContent polyRegContent = learner.perform(inTable, exec);
        m_betas = fillBeta(polyRegContent);
        m_meanValues = polyRegContent.getMeans();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
        m_squaredError /= rowCount;
        if (polyRegContent.getWarningMessage() != null) {
            setWarningMessage(polyRegContent.getWarningMessage());
        }
        double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
        double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
        double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        return bdt;
    } catch (ModelSpecificationException e) {
        final String origWarning = getWarningMessage();
        final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
        setWarningMessage(warning);
        final ExecutionContext subExec = exec.createSubExecutionContext(.1);
        final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
        int rowIdx = 1;
        for (final String column : m_columnNames) {
            for (int d = 1; d <= m_settings.getDegree(); ++d) {
                empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
            }
        }
        empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
        double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
        Arrays.fill(nans, Double.NaN);
        m_betas = new double[nans.length];
        // Mean only for the linear tags
        m_meanValues = new double[nans.length / m_settings.getDegree()];
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        empty.close();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
        PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
        PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
        return bdt;
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DoubleCell(org.knime.core.data.def.DoubleCell) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) DataArray(org.knime.base.node.util.DataArray) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ModelSpecificationException(org.apache.commons.math3.stat.regression.ModelSpecificationException) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) HashSet(java.util.HashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StringCell(org.knime.core.data.def.StringCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 40 with IntCell

use of org.knime.core.data.def.IntCell in project knime-core by knime.

the class HierarchicalClusterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    // determine the indices of the selected columns
    List<String> inlcludedCols = m_selectedColumns.getIncludeList();
    int[] selectedColIndices = new int[inlcludedCols.size()];
    for (int count = 0; count < selectedColIndices.length; count++) {
        selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
    }
    BufferedDataTable inputData = data[0];
    if (inputData.size() > 65500) {
        throw new RuntimeException("At most 65,500 patterns can be clustered");
    }
    DataTable outputData = null;
    if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
        m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
    } else {
        m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
    }
    // generate initial clustering
    // which means that every data point is one cluster
    List<ClusterNode> clusters = initClusters(inputData, exec);
    // store the distance per each fusion step
    DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
    int iterationStep = 0;
    final HalfFloatMatrix cache;
    if (m_cacheDistances.getBooleanValue()) {
        cache = new HalfFloatMatrix((int) inputData.size(), false);
        cache.fill(Float.NaN);
    } else {
        cache = null;
    }
    double max = inputData.size();
    // the number of clusters at the beginning is equal to the number
    // of data rows (each row is a cluster)
    int numberDataRows = clusters.size();
    while (clusters.size() > 1) {
        // checks if number clusters to generate output table is reached
        if (m_numClustersForOutput.getIntValue() == clusters.size()) {
            outputData = createResultTable(inputData, clusters, exec);
        }
        exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
        iterationStep++;
        exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
        // calculate distance between all clusters
        float currentSmallestDist = Float.MAX_VALUE;
        ClusterNode currentClosestCluster1 = null;
        ClusterNode currentClosestCluster2 = null;
        // subprogress for loop
        double availableProgress = (1.0 / numberDataRows);
        ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
        for (int i = 0; i < clusters.size(); i++) {
            exec.checkCanceled();
            ClusterNode node1 = clusters.get(i);
            for (int j = i + 1; j < clusters.size(); j++) {
                final float dist;
                ClusterNode node2 = clusters.get(j);
                // and average linkage supported.
                if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
                    dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
                } else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
                    dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
                } else {
                    dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
                }
                if (dist < currentSmallestDist) {
                    currentClosestCluster1 = node1;
                    currentClosestCluster2 = node2;
                    currentSmallestDist = dist;
                }
            }
        }
        subexec.setProgress(1.0);
        // make one cluster of the two closest
        ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
        clusters.remove(currentClosestCluster1);
        clusters.remove(currentClosestCluster2);
        clusters.add(newNode);
        // store the distance per each fusion step
        fusionCont.addRowToTable(new DefaultRow(// row key
        Integer.toString(clusters.size()), // x-axis scatter plotter
        new IntCell(clusters.size()), // y-axis scatter plotter
        new DoubleCell(newNode.getDist())));
    // // print number clusters and their data points
    // LOGGER.debug("Iteration " + iterationStep + ":");
    // LOGGER.debug(" Number Clusters: " + clusters.size());
    // printClustersDataRows(clusters);
    }
    if (clusters.size() > 0) {
        m_rootNode = clusters.get(0);
    }
    fusionCont.close();
    // if there was no input data create an empty output data
    if (outputData == null) {
        outputData = createResultTable(inputData, clusters, exec);
    }
    m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
    m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
    return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) BufferedDataTable(org.knime.core.node.BufferedDataTable) HalfFloatMatrix(org.knime.base.util.HalfFloatMatrix) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

IntCell (org.knime.core.data.def.IntCell)109 DataCell (org.knime.core.data.DataCell)79 DoubleCell (org.knime.core.data.def.DoubleCell)67 StringCell (org.knime.core.data.def.StringCell)55 DefaultRow (org.knime.core.data.def.DefaultRow)46 DataRow (org.knime.core.data.DataRow)33 DataTableSpec (org.knime.core.data.DataTableSpec)21 RowKey (org.knime.core.data.RowKey)21 ArrayList (java.util.ArrayList)20 DataType (org.knime.core.data.DataType)20 LongCell (org.knime.core.data.def.LongCell)14 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)14 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 BufferedDataTable (org.knime.core.node.BufferedDataTable)12 Test (org.junit.Test)11 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)9 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataContainer (org.knime.core.data.container.DataContainer)8 DateAndTimeValue (org.knime.core.data.date.DateAndTimeValue)8