Search in sources :

Example 6 with CellFactory

use of org.knime.core.data.container.CellFactory in project knime-core by knime.

the class FuzzyClusterNodeModel method execute.

/**
 * Generate new clustering based on InputDataTable and specified number of
 * clusters. In the output table, you will find the datarow with
 * supplementary information about the membership to each cluster center.
 * OUTPORT = original datarows with cluster membership information
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable indata = (BufferedDataTable) inData[0];
    m_clusters = null;
    m_betweenClusterVariation = Double.NaN;
    m_withinClusterVariation = null;
    if (m_noise) {
        if (m_calculateDelta) {
            if (m_memory) {
                m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
            } else {
                m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
            }
        } else {
            if (m_memory) {
                m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
            } else {
                m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
            }
        }
    } else {
        if (m_memory) {
            m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier);
        } else {
            m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier);
        }
    }
    int nrRows = indata.getRowCount();
    DataTableSpec spec = indata.getDataTableSpec();
    int nrCols = spec.getNumColumns();
    List<String> learningCols = new LinkedList<String>();
    List<String> ignoreCols = new LinkedList<String>();
    // counter for included columns
    int z = 0;
    final int[] columns = new int[m_list.size()];
    for (int i = 0; i < nrCols; i++) {
        // if include does contain current column name
        String colname = spec.getColumnSpec(i).getName();
        if (m_list.contains(colname)) {
            columns[z] = i;
            z++;
            learningCols.add(colname);
        } else {
            ignoreCols.add(colname);
        }
    }
    ColumnRearranger colre = new ColumnRearranger(spec);
    colre.keepOnly(columns);
    BufferedDataTable filteredtable = exec.createColumnRearrangeTable(indata, colre, exec);
    // get dimension of feature space
    int dimension = filteredtable.getDataTableSpec().getNumColumns();
    Random random = new Random();
    if (m_useRandomSeed) {
        random.setSeed(m_randomSeed);
    }
    m_fcmAlgo.init(nrRows, dimension, filteredtable, random);
    // main loop - until clusters stop changing or maxNrIterations reached
    int currentIteration = 0;
    double totalchange = Double.MAX_VALUE;
    while ((totalchange > 1e-7) && (currentIteration < m_maxNrIterations)) {
        exec.checkCanceled();
        exec.setProgress((double) currentIteration / (double) m_maxNrIterations, "Iteration " + currentIteration + " Total change of prototypes: " + totalchange);
        totalchange = m_fcmAlgo.doOneIteration(exec);
        currentIteration++;
    }
    if (m_measures) {
        double[][] data = null;
        if (m_fcmAlgo instanceof FCMAlgorithmMemory) {
            data = ((FCMAlgorithmMemory) m_fcmAlgo).getConvertedData();
        } else {
            data = new double[nrRows][m_fcmAlgo.getDimension()];
            int curRow = 0;
            for (DataRow dRow : filteredtable) {
                for (int j = 0; j < dRow.getNumCells(); j++) {
                    if (!(dRow.getCell(j).isMissing())) {
                        DoubleValue dv = (DoubleValue) dRow.getCell(j);
                        data[curRow][j] = dv.getDoubleValue();
                    } else {
                        data[curRow][j] = 0;
                    }
                }
                curRow++;
            }
        }
        m_fcmmeasures = new FCMQualityMeasures(m_fcmAlgo.getClusterCentres(), m_fcmAlgo.getweightMatrix(), data, m_fuzzifier);
    }
    ColumnRearranger colRearranger = new ColumnRearranger(spec);
    CellFactory membershipFac = new ClusterMembershipFactory(m_fcmAlgo);
    colRearranger.append(membershipFac);
    BufferedDataTable result = exec.createColumnRearrangeTable(indata, colRearranger, exec);
    // don't write out the noise cluster!
    double[][] clustercentres = m_fcmAlgo.getClusterCentres();
    if (m_noise) {
        double[][] cleaned = new double[clustercentres.length - 1][];
        for (int i = 0; i < cleaned.length; i++) {
            cleaned[i] = new double[clustercentres[i].length];
            System.arraycopy(clustercentres[i], 0, cleaned[i], 0, clustercentres[i].length);
        }
        clustercentres = cleaned;
    }
    exec.setMessage("Creating PMML cluster model...");
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_enablePMMLInput ? (PMMLPortObject) inData[1] : null;
    PMMLPortObjectSpec inPMMLSpec = null;
    if (inPMMLPort != null) {
        inPMMLSpec = inPMMLPort.getSpec();
    }
    PMMLPortObjectSpec pmmlOutSpec = createPMMLPortObjectSpec(inPMMLSpec, spec, learningCols);
    PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
    outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrClusters, clustercentres, null, new LinkedHashSet<String>(pmmlOutSpec.getLearningFields())));
    return new PortObject[] { result, outPMMLPort };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) DataRow(org.knime.core.data.DataRow) LinkedList(java.util.LinkedList) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) Random(java.util.Random) PMMLClusterTranslator(org.knime.base.node.mine.cluster.PMMLClusterTranslator) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) CellFactory(org.knime.core.data.container.CellFactory) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Example 7 with CellFactory

use of org.knime.core.data.container.CellFactory in project knime-core by knime.

the class PCAApplyNodeModel method execute.

/**
 * Performs the PCA.
 *
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final PCAModelPortObject model = (PCAModelPortObject) inData[MODEL_INPORT];
    final int dimensions = m_dimSelection.getNeededDimensions();
    if (dimensions == -1) {
        throw new IllegalArgumentException("Number of dimensions not correct configured");
    }
    if (m_failOnMissingValues.getBooleanValue()) {
        for (final DataRow row : (DataTable) inData[DATA_INPORT]) {
            for (int i = 0; i < m_inputColumnIndices.length; i++) {
                if (row.getCell(m_inputColumnIndices[i]).isMissing()) {
                    throw new IllegalArgumentException("data table contains missing values");
                }
            }
        }
    }
    final Matrix eigenvectors = EigenValue.getSortedEigenVectors(model.getEigenVectors(), model.getEigenvalues(), dimensions);
    final DataColumnSpec[] specs = PCANodeModel.createAddTableSpec((DataTableSpec) inData[DATA_INPORT].getSpec(), dimensions);
    final int dim = dimensions;
    final CellFactory fac = new CellFactory() {

        @Override
        public DataCell[] getCells(final DataRow row) {
            return PCANodeModel.convertInputRow(eigenvectors, row, model.getCenter(), m_inputColumnIndices, dim, m_failOnMissingValues.getBooleanValue());
        }

        @Override
        public DataColumnSpec[] getColumnSpecs() {
            return specs;
        }

        @Override
        public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor texec) {
            texec.setProgress((double) curRowNr / rowCount, "converting input row " + curRowNr + " of " + rowCount);
        }
    };
    final ColumnRearranger cr = new ColumnRearranger((DataTableSpec) inData[DATA_INPORT].getSpec());
    cr.append(fac);
    if (m_removeOriginalCols.getBooleanValue()) {
        cr.remove(m_inputColumnNames);
    }
    final BufferedDataTable result = exec.createColumnRearrangeTable((BufferedDataTable) inData[DATA_INPORT], cr, exec);
    final PortObject[] out = { result };
    return out;
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) Matrix(Jama.Matrix) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) CellFactory(org.knime.core.data.container.CellFactory) PortObject(org.knime.core.node.port.PortObject)

Example 8 with CellFactory

use of org.knime.core.data.container.CellFactory in project knime-core by knime.

the class CellReplacerNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
    String targetCol = m_targetColModel.getStringValue();
    if (targetCol == null || targetCol.length() == 0) {
        throw new InvalidSettingsException("No target column selected");
    }
    final int targetColIndex = spec.findColumnIndex(targetCol);
    if (targetColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
    }
    final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
    final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
    final boolean dictInputIsCollection;
    if (m_dictInputColModel.useRowID()) {
        dictInputIsCollection = false;
    } else if (dictInputColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
    } else {
        DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
        dictInputIsCollection = inS.getType().isCollectionType();
    }
    final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
    final DataType dictOutputColType;
    if (m_dictOutputColModel.useRowID()) {
        dictOutputColType = StringCell.TYPE;
    } else {
        if (dictOutputColIndex < 0) {
            throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
        }
        dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
    }
    final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
    DataType outputType;
    switch(noMatchPolicy) {
        case Input:
            outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
            break;
        default:
            outputType = dictOutputColType;
    }
    String newColName;
    if (m_appendColumnModel.getBooleanValue()) {
        String newName = m_appendColumnNameModel.getStringValue();
        if (newName == null || newName.length() == 0) {
            throw new InvalidSettingsException("No new column name given");
        }
        newColName = DataTableSpec.getUniqueColumnName(spec, newName);
    } else {
        newColName = targetColSpec.getName();
    }
    DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
    CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {

        private Map<DataCell, DataCell> m_dictionaryMap;

        @Override
        public DataCell getCell(final DataRow row) {
            try {
                ensureInitDictionaryMap();
            } catch (CanceledExecutionException e) {
                // cancellation done by the framework
                return DataType.getMissingCell();
            }
            DataCell cell = row.getCell(targetColIndex);
            DataCell output = m_dictionaryMap.get(cell);
            if (output == null) {
                switch(noMatchPolicy) {
                    case Input:
                        return cell;
                    default:
                        return DataType.getMissingCell();
                }
            }
            return output;
        }

        private void ensureInitDictionaryMap() throws CanceledExecutionException {
            if (m_dictionaryMap == null) {
                m_dictionaryMap = new HashMap<DataCell, DataCell>();
                int i = 0;
                double rowCount = dictTable.size();
                for (DataRow r : dictTable) {
                    dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
                    dictionaryInitExec.checkCanceled();
                    DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
                    DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
                    if (input.isMissing()) {
                        addSearchPair(input, output);
                    } else if (dictInputIsCollection) {
                        CollectionDataValue v = (CollectionDataValue) input;
                        for (DataCell element : v) {
                            addSearchPair(element, output);
                        }
                    } else {
                        addSearchPair(input, output);
                    }
                }
            }
        }

        private void addSearchPair(final DataCell input, final DataCell output) {
            if (m_dictionaryMap.put(input, output) != null) {
                setWarningMessage("Duplicate search key \"" + input + "\"");
            }
        }
    };
    ColumnRearranger result = new ColumnRearranger(spec);
    if (m_appendColumnModel.getBooleanValue()) {
        result.append(c);
    } else {
        result.replace(c, targetColIndex);
    }
    return result;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) HashMap(java.util.HashMap) Map(java.util.Map) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Example 9 with CellFactory

use of org.knime.core.data.container.CellFactory in project knime-core by knime.

the class InteractiveHiLiteCollectorNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    m_data = inData[0];
    if (m_annotationMap.isEmpty()) {
        return new PortObject[] { m_data };
    }
    DataTableSpec inSpec = (DataTableSpec) m_data.getSpec();
    final DataColumnSpec[] cspecs = createSpecs(inSpec);
    ColumnRearranger cr = new ColumnRearranger(inSpec);
    cr.append(new CellFactory() {

        /**
         * {@inheritDoc}
         */
        @Override
        public DataCell[] getCells(final DataRow row) {
            if (m_annotationMap.isEmpty()) {
                return new DataCell[0];
            }
            DataCell[] cells = new DataCell[m_lastIndex + 1];
            for (int i = 0; i < cells.length; i++) {
                Map<Integer, String> map = m_annotationMap.get(row.getKey());
                if (map == null) {
                    cells[i] = DataType.getMissingCell();
                } else {
                    String str = map.get(i);
                    if (str == null) {
                        cells[i] = DataType.getMissingCell();
                    } else {
                        cells[i] = new StringCell(str);
                    }
                }
            }
            return cells;
        }

        @Override
        public DataColumnSpec[] getColumnSpecs() {
            return cspecs;
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor em) {
            em.setProgress((double) curRowNr / rowCount);
        }
    });
    return new BufferedDataTable[] { exec.createColumnRearrangeTable((BufferedDataTable) m_data, cr, exec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) RowKey(org.knime.core.data.RowKey) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PortObject(org.knime.core.node.port.PortObject) CellFactory(org.knime.core.data.container.CellFactory) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 10 with CellFactory

use of org.knime.core.data.container.CellFactory in project knime-core by knime.

the class XML2PMMLNodeModel method createColRearranger.

private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    if (m_xmlColumnName.getStringValue() == null) {
        guessDefaultXMLColumn(spec);
    }
    String xmlColName = m_xmlColumnName.getStringValue();
    String newColName = m_newColumnName.getStringValue();
    final int colIndex = spec.findColumnIndex(xmlColName);
    CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
    final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
    CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
    DataColumnSpecCreator colSpecCreator;
    if (newColName != null && !m_replaceColumn.getBooleanValue()) {
        String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
        colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
    } else {
        colSpecCreator = new DataColumnSpecCreator(colSpec);
        colSpecCreator.setType(PMMLCell.TYPE);
        colSpecCreator.removeAllHandlers();
        colSpecCreator.setDomain(null);
    }
    DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
    ColumnRearranger rearranger = new ColumnRearranger(spec);
    CellFactory fac = new SingleCellFactory(outColumnSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell cell = row.getCell(colIndex);
            if (cell.isMissing()) {
                return DataType.getMissingCell();
            } else {
                PMMLDocument pmmlDoc = null;
                String failure = null;
                XmlObject xmlDoc;
                try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
                    xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
                    if (xmlDoc instanceof PMMLDocument) {
                        pmmlDoc = (PMMLDocument) xmlDoc;
                    } else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
                        String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
                        /* Parse the modified document and assign it to a
                                 * PMMLDocument.*/
                        pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
                    } else {
                        failure = "No valid PMML v 3.x/4.0/4.1 document";
                    }
                } catch (XmlException e) {
                    if (!m_failOnInvalid.getBooleanValue()) {
                        LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
                    }
                    failure = e.getMessage();
                }
                if (failure != null) {
                    m_failCounter.incrementAndGet();
                    if (m_failOnInvalid.getBooleanValue()) {
                        throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
                    } else {
                        return new MissingCell(failure);
                    }
                } else {
                    try {
                        return PMMLCellFactory.create(pmmlDoc.toString());
                    } catch (Exception e) {
                        return new MissingCell(e.getMessage());
                    }
                }
            }
        }
    };
    if (m_replaceColumn.getBooleanValue()) {
        rearranger.replace(fac, colIndex);
    } else {
        rearranger.append(fac);
    }
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) MissingCell(org.knime.core.data.MissingCell) XmlException(org.apache.xmlbeans.XmlException) DataCell(org.knime.core.data.DataCell) XmlObject(org.apache.xmlbeans.XmlObject) PMMLDocument(org.dmg.pmml.PMMLDocument) XMLValue(org.knime.core.data.xml.XMLValue) StringValue(org.knime.core.data.StringValue) PMMLCellFactory(org.knime.core.data.xml.PMMLCellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Aggregations

CellFactory (org.knime.core.data.container.CellFactory)26 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)23 DataColumnSpec (org.knime.core.data.DataColumnSpec)20 DataRow (org.knime.core.data.DataRow)20 DataCell (org.knime.core.data.DataCell)13 SingleCellFactory (org.knime.core.data.container.SingleCellFactory)13 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)12 RowKey (org.knime.core.data.RowKey)8 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)8 DataTableSpec (org.knime.core.data.DataTableSpec)7 BufferedDataTable (org.knime.core.node.BufferedDataTable)7 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)7 StringCell (org.knime.core.data.def.StringCell)6 PortObject (org.knime.core.node.port.PortObject)6 Matrix (Jama.Matrix)3 ArrayList (java.util.ArrayList)3 LinkedHashMap (java.util.LinkedHashMap)3 Map (java.util.Map)3 DataType (org.knime.core.data.DataType)3 DoubleValue (org.knime.core.data.DoubleValue)3