Search in sources :

Example 56 with StringCell

use of in project knime-core by knime.

the class ClusterNodeModel method createAppendedSpec.

private DataTableSpec createAppendedSpec(final DataTableSpec originalSpec) {
    // determine the possible values of the appended column
    DataCell[] possibleValues = new DataCell[m_nrOfClusters.getIntValue()];
    for (int i = 0; i < m_nrOfClusters.getIntValue(); i++) {
        DataCell key = new StringCell(CLUSTER + i);
        possibleValues[i] = key;
    // create the domain
    // 1) guess an unused name for the new column (fixes bug #1022)
    String colNameGuess = "Cluster";
    int uniqueNr = 0;
    while (originalSpec.getColumnSpec(colNameGuess) != null) {
        colNameGuess = "Cluster_" + uniqueNr;
    // 2) create spec
    DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(possibleValues);
    DataColumnSpecCreator creator = new DataColumnSpecCreator(colNameGuess, StringCell.TYPE);
    // create the appended column spec
    DataColumnSpec labelColSpec = creator.createSpec();
    return new DataTableSpec(originalSpec, new DataTableSpec(labelColSpec));
Also used : DataTableSpec( DataColumnSpecCreator( DataColumnSpec( StringCell( DataCell( DataColumnDomainCreator( SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString)

Example 57 with StringCell

use of in project knime-core by knime.

the class PMMLDecisionTreeTranslator method getClassCount.

private LinkedHashMap<DataCell, Double> getClassCount(final Node node) {
    LinkedHashMap<DataCell, Double> knimeScoreDistribution = new LinkedHashMap<DataCell, Double>();
    ScoreDistribution[] pmmlScoreDistArray = node.getScoreDistributionArray();
    for (ScoreDistribution sd : pmmlScoreDistArray) {
        String category = sd.getValue();
        Double recordCount = sd.getRecordCount();
        knimeScoreDistribution.put(new StringCell(category), recordCount);
    return knimeScoreDistribution;
Also used : ScoreDistribution(org.dmg.pmml.ScoreDistributionDocument.ScoreDistribution) StringCell( DataCell( LinkedHashMap(java.util.LinkedHashMap)

Example 58 with StringCell

use of in project knime-core by knime.

the class CellSplitterCellFactory method tokenizeAndCreateCollectionsCell.

 * Tokenizes the string representation of the given data cell and returns
 * an array of data cells. The array contains only one data cell, which
 * is a collection cell. Whether it is a List or Set cell is specified in
 * the settings. The collection cell contains string cells. For each
 * token one string cell is created.
 * @param inputCell the cell to tokenize (its string representation)
 * @return An array containing exactly one collection cell, storing string
 * cells. For each token one string cell.
 * @since 2.6
private DataCell[] tokenizeAndCreateCollectionsCell(final DataCell inputCell) {
    DataCell[] result = new DataCell[1];
    // missing value handling
    if (inputCell.isMissing()) {
        Arrays.fill(result, DataType.getMissingCell());
        if (m_settings.isUseEmptyString()) {
            Collection<DataCell> strColl = new ArrayList<DataCell>(1);
            result[0] = CollectionCellFactory.createListCell(strColl);
        return result;
    final String inputString = getInputString(inputCell);
    // init the tokenizer
    StringReader inputReader = new StringReader(inputString);
    Tokenizer tokenizer = prepareTokenizer(inputReader);
    Collection<DataCell> strColl = new ArrayList<DataCell>();
    String token = null;
    while ((token = tokenizer.nextToken()) != null) {
        if (m_settings.isTrim()) {
            token = token.trim();
        strColl.add(new StringCell(token));
    if (m_settings.isOutputAsList()) {
        result[0] = CollectionCellFactory.createListCell(strColl);
    } else {
        result[0] = CollectionCellFactory.createSetCell(strColl);
    return result;
Also used : StringCell( ArrayList(java.util.ArrayList) StringReader( DataCell( Tokenizer(org.knime.core.util.tokenizer.Tokenizer)

Example 59 with StringCell

use of in project knime-core by knime.

the class ColumnAutoTypeCasterNodeModel method execute.

 * {@inheritDoc}
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    double progress = 0;
    final BufferedDataTable data = inData[0];
    BufferedDataTable outTable = inData[0];
    final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
    final DataType[] types = new DataType[incls.length];
    final double max = incls.length + data.size();
    final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
    final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
    BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
    setReasons(new String[incls.length][3]);
    if (data.size() > 0) {
        // empty table check
        SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
        long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
        for (DataRow row : data) {
            if (!(0 < numberOfRows--)) {
            for (int i = 0; i < incls.length; i++) {
                // guess for each cell in each column the best matching datatype
                DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
                if (!c.isMissing() && c.toString().equals(m_missValPat)) {
                DataType newType = typeGuesser(c, dateFormat);
                if (types[i] != null) {
                    DataType toSet = setType(types[i], newType);
                    if (!toSet.equals(types[i])) {
                        m_reasons[i][2] = row.getKey().getString();
                        m_reasons[i][1] = toSet.toString();
                        m_reasons[i][0] = incls[i];
                    types[i] = toSet;
                } else {
                    types[i] = newType;
                    String r = row.getKey().toString();
                    r += m_quickScan ? (" based on a quickscan.") : "";
                    m_reasons[i][2] = r;
                    m_reasons[i][1] = newType.toString();
                    m_reasons[i][0] = incls[i];
            exec.setProgress(progress / max);
        for (int i = 0; i < types.length; i++) {
            // if one column only contains missingCells than set column type to StringCell
            if (types[i].equals(DataType.getMissingCell().getType())) {
                types[i] = StringCell.TYPE;
        ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
        for (int i = 0; i < incls.length; i++) {
            final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
            final DataType type = types[i];
            DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
            DataColumnSpec colSpec = colSpecCreator.createSpec();
            if (type.equals(DateAndTimeCell.TYPE)) {
                arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
            } else if (type.equals(LongCell.TYPE)) {
                arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
            } else {
                arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
            exec.setProgress(progress / max);
        outTable = exec.createColumnRearrangeTable(data, arrange, exec);
        for (int i = 0; i < m_reasons.length; i++) {
            DataCell[] row = new DataCell[m_reasons[i].length];
            for (int j = 0; j < m_reasons[i].length; j++) {
                row[j] = new StringCell(m_reasons[i][j]);
            reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
    BufferedDataTable outReasons = reasonsCon.getTable();
    return new BufferedDataTable[] { outTable, outReasons };
Also used : DataTableSpec( DataColumnSpecCreator( BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataRow( ColumnRearranger( DataColumnSpec( StringCell( BufferedDataTable(org.knime.core.node.BufferedDataTable) DataType( DataCell( DefaultRow( SimpleDateFormat(java.text.SimpleDateFormat)

Example 60 with StringCell

use of in project knime-core by knime.

the class CellReplacerNodeModel method createColumnRearranger.

private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
    String targetCol = m_targetColModel.getStringValue();
    if (targetCol == null || targetCol.length() == 0) {
        throw new InvalidSettingsException("No target column selected");
    final int targetColIndex = spec.findColumnIndex(targetCol);
    if (targetColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
    final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
    final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
    final boolean dictInputIsCollection;
    if (m_dictInputColModel.useRowID()) {
        dictInputIsCollection = false;
    } else if (dictInputColIndex < 0) {
        throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
    } else {
        DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
        dictInputIsCollection = inS.getType().isCollectionType();
    final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
    final DataType dictOutputColType;
    if (m_dictOutputColModel.useRowID()) {
        dictOutputColType = StringCell.TYPE;
    } else {
        if (dictOutputColIndex < 0) {
            throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
        dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
    final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
    DataType outputType;
    switch(noMatchPolicy) {
        case Input:
            outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
            outputType = dictOutputColType;
    String newColName;
    if (m_appendColumnModel.getBooleanValue()) {
        String newName = m_appendColumnNameModel.getStringValue();
        if (newName == null || newName.length() == 0) {
            throw new InvalidSettingsException("No new column name given");
        newColName = DataTableSpec.getUniqueColumnName(spec, newName);
    } else {
        newColName = targetColSpec.getName();
    DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
    CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {

        private Map<DataCell, DataCell> m_dictionaryMap;

        public DataCell getCell(final DataRow row) {
            try {
            } catch (CanceledExecutionException e) {
                // cancellation done by the framework
                return DataType.getMissingCell();
            DataCell cell = row.getCell(targetColIndex);
            DataCell output = m_dictionaryMap.get(cell);
            if (output == null) {
                switch(noMatchPolicy) {
                    case Input:
                        return cell;
                        return DataType.getMissingCell();
            return output;

        private void ensureInitDictionaryMap() throws CanceledExecutionException {
            if (m_dictionaryMap == null) {
                m_dictionaryMap = new HashMap<DataCell, DataCell>();
                int i = 0;
                double rowCount = dictTable.size();
                for (DataRow r : dictTable) {
                    dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
                    DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
                    DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
                    if (input.isMissing()) {
                        addSearchPair(input, output);
                    } else if (dictInputIsCollection) {
                        CollectionDataValue v = (CollectionDataValue) input;
                        for (DataCell element : v) {
                            addSearchPair(element, output);
                    } else {
                        addSearchPair(input, output);

        private void addSearchPair(final DataCell input, final DataCell output) {
            if (m_dictionaryMap.put(input, output) != null) {
                setWarningMessage("Duplicate search key \"" + input + "\"");
    ColumnRearranger result = new ColumnRearranger(spec);
    if (m_appendColumnModel.getBooleanValue()) {
    } else {
        result.replace(c, targetColIndex);
    return result;
Also used : DataColumnSpecCreator( SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) DataRow( DataColumnSpec( ColumnRearranger( InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) StringCell( DataType( DataCell( SingleCellFactory( CellFactory( SingleCellFactory( HashMap(java.util.HashMap) Map(java.util.Map) CollectionDataValue(


StringCell ( DataCell ( DoubleCell ( DefaultRow ( IntCell ( DataRow ( DataTableSpec ( ArrayList (java.util.ArrayList)41 DataColumnSpec ( RowKey ( DataColumnSpecCreator ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)26 DataType ( LinkedHashSet (java.util.LinkedHashSet)21 BufferedDataTable (org.knime.core.node.BufferedDataTable)20 ColumnRearranger ( InvalidSettingsException (org.knime.core.node.InvalidSettingsException)16 LinkedHashMap (java.util.LinkedHashMap)15 Test (org.junit.Test)15 HashMap (java.util.HashMap)11