Search in sources :

Example 56 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class ClusterNodeModel method createAppendedSpec.

private DataTableSpec createAppendedSpec(final DataTableSpec originalSpec) {
    // determine the possible values of the appended column
    DataCell[] possibleValues = new DataCell[m_nrOfClusters.getIntValue()];
    for (int i = 0; i < m_nrOfClusters.getIntValue(); i++) {
        DataCell key = new StringCell(CLUSTER + i);
        possibleValues[i] = key;
    }
    // create the domain
    // 1) guess an unused name for the new column (fixes bug #1022)
    String colNameGuess = "Cluster";
    int uniqueNr = 0;
    while (originalSpec.getColumnSpec(colNameGuess) != null) {
        uniqueNr++;
        colNameGuess = "Cluster_" + uniqueNr;
    }
    // 2) create spec
    DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(possibleValues);
    DataColumnSpecCreator creator = new DataColumnSpecCreator(colNameGuess, StringCell.TYPE);
    creator.setDomain(domainCreator.createDomain());
    // create the appended column spec
    DataColumnSpec labelColSpec = creator.createSpec();
    return new DataTableSpec(originalSpec, new DataTableSpec(labelColSpec));
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString)

Example 57 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class PMMLDecisionTreeTranslator method getClassCount.

private LinkedHashMap<DataCell, Double> getClassCount(final Node node) {
    LinkedHashMap<DataCell, Double> knimeScoreDistribution = new LinkedHashMap<DataCell, Double>();
    ScoreDistribution[] pmmlScoreDistArray = node.getScoreDistributionArray();
    for (ScoreDistribution sd : pmmlScoreDistArray) {
        String category = sd.getValue();
        Double recordCount = sd.getRecordCount();
        knimeScoreDistribution.put(new StringCell(category), recordCount);
    }
    return knimeScoreDistribution;
}
Also used : ScoreDistribution(org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) LinkedHashMap(java.util.LinkedHashMap)

Example 58 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class CellSplitterCellFactory method tokenizeAndCreateCollectionsCell.

/**
 * Tokenizes the string representation of the given data cell and returns
 * an array of data cells. The array contains only one data cell, which
 * is a collection cell. Whether it is a List or Set cell is specified in
 * the settings. The collection cell contains string cells. For each
 * token one string cell is created.
 * @param inputCell the cell to tokenize (its string representation)
 * @return An array containing exactly one collection cell, storing string
 * cells. For each token one string cell.
 * @since 2.6
 */
private DataCell[] tokenizeAndCreateCollectionsCell(final DataCell inputCell) {
    DataCell[] result = new DataCell[1];
    // missing value handling
    if (inputCell.isMissing()) {
        Arrays.fill(result, DataType.getMissingCell());
        if (m_settings.isUseEmptyString()) {
            Collection<DataCell> strColl = new ArrayList<DataCell>(1);
            strColl.add(EMPTY_STRINGCELL);
            result[0] = CollectionCellFactory.createListCell(strColl);
        }
        return result;
    }
    final String inputString = getInputString(inputCell);
    // init the tokenizer
    StringReader inputReader = new StringReader(inputString);
    Tokenizer tokenizer = prepareTokenizer(inputReader);
    Collection<DataCell> strColl = new ArrayList<DataCell>();
    String token = null;
    while ((token = tokenizer.nextToken()) != null) {
        if (m_settings.isTrim()) {
            token = token.trim();
        }
        strColl.add(new StringCell(token));
    }
    if (m_settings.isOutputAsList()) {
        result[0] = CollectionCellFactory.createListCell(strColl);
    } else {
        result[0] = CollectionCellFactory.createSetCell(strColl);
    }
    return result;
}
Also used : StringCell(org.knime.core.data.def.StringCell) ArrayList(java.util.ArrayList) StringReader(java.io.StringReader) DataCell(org.knime.core.data.DataCell) Tokenizer(org.knime.core.util.tokenizer.Tokenizer)

Example 59 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class ColumnAutoTypeCasterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    double progress = 0;
    final BufferedDataTable data = inData[0];
    BufferedDataTable outTable = inData[0];
    final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
    final DataType[] types = new DataType[incls.length];
    final double max = incls.length + data.size();
    final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
    final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
    BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
    setReasons(new String[incls.length][3]);
    if (data.size() > 0) {
        // empty table check
        SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
        long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
        for (DataRow row : data) {
            if (!(0 < numberOfRows--)) {
                data.iterator().close();
                break;
            }
            for (int i = 0; i < incls.length; i++) {
                // guess for each cell in each column the best matching datatype
                DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
                if (!c.isMissing() && c.toString().equals(m_missValPat)) {
                    continue;
                }
                DataType newType = typeGuesser(c, dateFormat);
                if (types[i] != null) {
                    DataType toSet = setType(types[i], newType);
                    if (!toSet.equals(types[i])) {
                        m_reasons[i][2] = row.getKey().getString();
                        m_reasons[i][1] = toSet.toString();
                        m_reasons[i][0] = incls[i];
                    }
                    types[i] = toSet;
                } else {
                    types[i] = newType;
                    String r = row.getKey().toString();
                    r += m_quickScan ? (" based on a quickscan.") : "";
                    m_reasons[i][2] = r;
                    m_reasons[i][1] = newType.toString();
                    m_reasons[i][0] = incls[i];
                }
                exec.checkCanceled();
            }
            exec.checkCanceled();
            progress++;
            exec.setProgress(progress / max);
        }
        for (int i = 0; i < types.length; i++) {
            // if one column only contains missingCells than set column type to StringCell
            if (types[i].equals(DataType.getMissingCell().getType())) {
                types[i] = StringCell.TYPE;
            }
        }
        ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
        for (int i = 0; i < incls.length; i++) {
            final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
            final DataType type = types[i];
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
            DataColumnSpec colSpec = colSpecCreator.createSpec();
            if (type.equals(DateAndTimeCell.TYPE)) {
                arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
            } else if (type.equals(LongCell.TYPE)) {
                arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
            } else {
                arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
            }
            progress++;
            exec.setProgress(progress / max);
            exec.checkCanceled();
        }
        outTable = exec.createColumnRearrangeTable(data, arrange, exec);
        for (int i = 0; i < m_reasons.length; i++) {
            DataCell[] row = new DataCell[m_reasons[i].length];
            for (int j = 0; j < m_reasons[i].length; j++) {
                row[j] = new StringCell(m_reasons[i][j]);
            }
            reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
        }
    }
    reasonsCon.close();
    BufferedDataTable outReasons = reasonsCon.getTable();
    return new BufferedDataTable[] { outTable, outReasons };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataRow(org.knime.core.data.DataRow) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) SimpleDateFormat(java.text.SimpleDateFormat)

Example 60 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class CellReplacerNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
    String targetCol = m_targetColModel.getStringValue();
    if (targetCol == null || targetCol.length() == 0) {
        throw new InvalidSettingsException("No target column selected");
    }
    final int targetColIndex = spec.findColumnIndex(targetCol);
    if (targetColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
    }
    final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
    final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
    final boolean dictInputIsCollection;
    if (m_dictInputColModel.useRowID()) {
        dictInputIsCollection = false;
    } else if (dictInputColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
    } else {
        DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
        dictInputIsCollection = inS.getType().isCollectionType();
    }
    final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
    final DataType dictOutputColType;
    if (m_dictOutputColModel.useRowID()) {
        dictOutputColType = StringCell.TYPE;
    } else {
        if (dictOutputColIndex < 0) {
            throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
        }
        dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
    }
    final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
    DataType outputType;
    switch(noMatchPolicy) {
        case Input:
            outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
            break;
        default:
            outputType = dictOutputColType;
    }
    String newColName;
    if (m_appendColumnModel.getBooleanValue()) {
        String newName = m_appendColumnNameModel.getStringValue();
        if (newName == null || newName.length() == 0) {
            throw new InvalidSettingsException("No new column name given");
        }
        newColName = DataTableSpec.getUniqueColumnName(spec, newName);
    } else {
        newColName = targetColSpec.getName();
    }
    DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
    CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {

        private Map<DataCell, DataCell> m_dictionaryMap;

        @Override
        public DataCell getCell(final DataRow row) {
            try {
                ensureInitDictionaryMap();
            } catch (CanceledExecutionException e) {
                // cancellation done by the framework
                return DataType.getMissingCell();
            }
            DataCell cell = row.getCell(targetColIndex);
            DataCell output = m_dictionaryMap.get(cell);
            if (output == null) {
                switch(noMatchPolicy) {
                    case Input:
                        return cell;
                    default:
                        return DataType.getMissingCell();
                }
            }
            return output;
        }

        private void ensureInitDictionaryMap() throws CanceledExecutionException {
            if (m_dictionaryMap == null) {
                m_dictionaryMap = new HashMap<DataCell, DataCell>();
                int i = 0;
                double rowCount = dictTable.size();
                for (DataRow r : dictTable) {
                    dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
                    dictionaryInitExec.checkCanceled();
                    DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
                    DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
                    if (input.isMissing()) {
                        addSearchPair(input, output);
                    } else if (dictInputIsCollection) {
                        CollectionDataValue v = (CollectionDataValue) input;
                        for (DataCell element : v) {
                            addSearchPair(element, output);
                        }
                    } else {
                        addSearchPair(input, output);
                    }
                }
            }
        }

        private void addSearchPair(final DataCell input, final DataCell output) {
            if (m_dictionaryMap.put(input, output) != null) {
                setWarningMessage("Duplicate search key \"" + input + "\"");
            }
        }
    };
    ColumnRearranger result = new ColumnRearranger(spec);
    if (m_appendColumnModel.getBooleanValue()) {
        result.append(c);
    } else {
        result.replace(c, targetColIndex);
    }
    return result;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) HashMap(java.util.HashMap) Map(java.util.Map) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue)

Aggregations

StringCell (org.knime.core.data.def.StringCell)176 DataCell (org.knime.core.data.DataCell)130 DoubleCell (org.knime.core.data.def.DoubleCell)67 DefaultRow (org.knime.core.data.def.DefaultRow)65 IntCell (org.knime.core.data.def.IntCell)55 DataRow (org.knime.core.data.DataRow)52 DataTableSpec (org.knime.core.data.DataTableSpec)49 ArrayList (java.util.ArrayList)41 DataColumnSpec (org.knime.core.data.DataColumnSpec)37 RowKey (org.knime.core.data.RowKey)36 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)26 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)26 DataType (org.knime.core.data.DataType)22 LinkedHashSet (java.util.LinkedHashSet)21 BufferedDataTable (org.knime.core.node.BufferedDataTable)20 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)19 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)16 LinkedHashMap (java.util.LinkedHashMap)15 Test (org.junit.Test)15 HashMap (java.util.HashMap)11