Search in sources :

Example 21 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class TreeNodeRegression method createDecisionTreeNode.

/**
 * @param metaData
 * @return
 */
public DecisionTreeNode createDecisionTreeNode(final MutableInteger idGenerator, final TreeMetaData metaData) {
    DataCell majorityCell = new StringCell(DoubleFormat.formatDouble(m_mean));
    final int nrChildren = getNrChildren();
    LinkedHashMap<DataCell, Double> distributionMap = new LinkedHashMap<DataCell, Double>();
    distributionMap.put(majorityCell, m_totalSum);
    if (nrChildren == 0) {
        return new DecisionTreeNodeLeaf(idGenerator.inc(), majorityCell, distributionMap);
    } else {
        int id = idGenerator.inc();
        DecisionTreeNode[] childNodes = new DecisionTreeNode[nrChildren];
        int splitAttributeIndex = getSplitAttributeIndex();
        assert splitAttributeIndex >= 0 : "non-leaf node has no split";
        String splitAttribute = metaData.getAttributeMetaData(splitAttributeIndex).getAttributeName();
        PMMLPredicate[] childPredicates = new PMMLPredicate[nrChildren];
        for (int i = 0; i < nrChildren; i++) {
            final TreeNodeRegression treeNode = getChild(i);
            TreeNodeCondition cond = treeNode.getCondition();
            childPredicates[i] = cond.toPMMLPredicate();
            childNodes[i] = treeNode.createDecisionTreeNode(idGenerator, metaData);
        }
        return new DecisionTreeNodeSplitPMML(id, majorityCell, distributionMap, splitAttribute, childPredicates, childNodes);
    }
}
Also used : PMMLPredicate(org.knime.base.node.mine.decisiontree2.PMMLPredicate) LinkedHashMap(java.util.LinkedHashMap) DecisionTreeNodeLeaf(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeLeaf) StringCell(org.knime.core.data.def.StringCell) DecisionTreeNodeSplitPMML(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNodeSplitPMML) DataCell(org.knime.core.data.DataCell) DecisionTreeNode(org.knime.base.node.mine.decisiontree2.model.DecisionTreeNode)

Example 22 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class DBRowIteratorImpl method readArray.

protected DataCell readArray(final int i) throws SQLException {
    final Array array = m_result.getArray(i + 1);
    if (wasNull() || array == null) {
        return DataType.getMissingCell();
    } else {
        final Object[] vals = (Object[]) array.getArray();
        final Collection<DataCell> cells = new ArrayList<>(vals.length);
        for (Object val : vals) {
            cells.add(val == null ? DataType.getMissingCell() : new StringCell(val.toString()));
        }
        return CollectionCellFactory.createListCell(cells);
    }
}
Also used : Array(java.sql.Array) StringCell(org.knime.core.data.def.StringCell) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell)

Example 23 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.

/**
 * @param pmmlDoc the PMML document to analyze
 * @param colSpecs the list to add the data column specs to
 */
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
    DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
    for (DataField dataField : dict.getDataFieldArray()) {
        String name = dataField.getName();
        DataType dataType = getKNIMEDataType(dataField.getDataType());
        DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
        DataColumnDomain domain = null;
        if (dataType.isCompatible(NominalValue.class)) {
            Value[] valueArray = dataField.getValueArray();
            DataCell[] cells;
            if (DataType.getType(StringCell.class).equals(dataType)) {
                if (dataField.getIntervalArray().length > 0) {
                    throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
                }
                cells = new StringCell[valueArray.length];
                if (valueArray.length > 0) {
                    for (int j = 0; j < cells.length; j++) {
                        cells[j] = new StringCell(valueArray[j].getValue());
                    }
                }
                domain = new DataColumnDomainCreator(cells).createDomain();
            }
        } else if (dataType.isCompatible(DoubleValue.class)) {
            Double leftMargin = null;
            Double rightMargin = null;
            Interval[] intervalArray = dataField.getIntervalArray();
            if (intervalArray != null && intervalArray.length > 0) {
                Interval interval = dataField.getIntervalArray(0);
                leftMargin = interval.getLeftMargin();
                rightMargin = interval.getRightMargin();
            } else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
                // try to derive the bounds from the values
                Value[] valueArray = dataField.getValueArray();
                List<Double> values = new ArrayList<Double>();
                for (int j = 0; j < valueArray.length; j++) {
                    String value = "";
                    try {
                        value = valueArray[j].getValue();
                        values.add(Double.parseDouble(value));
                    } catch (Exception e) {
                        throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
                    }
                }
                leftMargin = Collections.min(values);
                rightMargin = Collections.max(values);
            }
            if (leftMargin != null && rightMargin != null) {
                // set the bounds of the domain if available
                DataCell lowerBound = null;
                DataCell upperBound = null;
                if (DataType.getType(IntCell.class).equals(dataType)) {
                    lowerBound = new IntCell(leftMargin.intValue());
                    upperBound = new IntCell(rightMargin.intValue());
                } else if (DataType.getType(DoubleCell.class).equals(dataType)) {
                    lowerBound = new DoubleCell(leftMargin);
                    upperBound = new DoubleCell(rightMargin);
                }
                domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
            } else {
                domain = new DataColumnDomainCreator().createDomain();
            }
        }
        specCreator.setDomain(domain);
        colSpecs.add(specCreator.createSpec());
        m_dictFields.add(name);
    }
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataDictionary(org.dmg.pmml.DataDictionaryDocument.DataDictionary) IntCell(org.knime.core.data.def.IntCell) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataField(org.dmg.pmml.DataFieldDocument.DataField) StringCell(org.knime.core.data.def.StringCell) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) BooleanValue(org.knime.core.data.BooleanValue) IntValue(org.knime.core.data.IntValue) Value(org.dmg.pmml.ValueDocument.Value) DoubleValue(org.knime.core.data.DoubleValue) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) Interval(org.dmg.pmml.IntervalDocument.Interval)

Example 24 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class TreeNominalColumnDataTest method createPCATestData.

private static Pair<TreeNominalColumnData, TreeTargetNominalColumnData> createPCATestData(final TreeEnsembleLearnerConfiguration config) {
    DataColumnSpec colSpec = new DataColumnSpecCreator("test-col", StringCell.TYPE).createSpec();
    final String[] attVals = new String[] { "A", "B", "C", "D", "E" };
    final String[] classes = new String[] { "T1", "T2", "T3" };
    TreeNominalColumnDataCreator colCreator = new TreeNominalColumnDataCreator(colSpec);
    DataColumnSpecCreator specCreator = new DataColumnSpecCreator("target-col", StringCell.TYPE);
    specCreator.setDomain(new DataColumnDomainCreator(Arrays.stream(classes).distinct().map(s -> new StringCell(s)).toArray(i -> new StringCell[i])).createDomain());
    DataColumnSpec targetSpec = specCreator.createSpec();
    TreeTargetColumnDataCreator targetCreator = new TreeTargetNominalColumnDataCreator(targetSpec);
    long rowKeyCounter = 0;
    final int[][] classDistributions = new int[][] { { 40, 10, 10 }, { 10, 40, 10 }, { 20, 30, 10 }, { 20, 15, 25 }, { 10, 5, 45 } };
    for (int i = 0; i < attVals.length; i++) {
        for (int j = 0; j < classes.length; j++) {
            for (int k = 0; k < classDistributions[i][j]; k++) {
                RowKey key = RowKey.createRowKey(rowKeyCounter++);
                colCreator.add(key, new StringCell(attVals[i]));
                targetCreator.add(key, new StringCell(classes[j]));
            }
        }
    }
    final TreeNominalColumnData testColData = colCreator.createColumnData(0, config);
    testColData.getMetaData().setAttributeIndex(0);
    return Pair.create(testColData, (TreeTargetNominalColumnData) targetCreator.createColumnData());
}
Also used : Arrays(java.util.Arrays) RandomData(org.apache.commons.math.random.RandomData) RowKey(org.knime.core.data.RowKey) IsInstanceOf.instanceOf(org.hamcrest.core.IsInstanceOf.instanceOf) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) SplitCriterion(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.SplitCriterion) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) TreeNodeNominalCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalCondition) Pair(org.knime.core.util.Pair) Assert.assertThat(org.junit.Assert.assertThat) ColumnSamplingMode(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.ColumnSamplingMode) TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) NominalMultiwaySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate) SetLogic(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition.SetLogic) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate) BigInteger(java.math.BigInteger) TreeNodeNominalBinaryCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition) SplitCandidate(org.knime.base.node.mine.treeensemble2.learner.SplitCandidate) TreeType(org.knime.base.node.mine.treeensemble2.model.AbstractTreeEnsembleModel.TreeType) Assert.assertNotNull(org.junit.Assert.assertNotNull) IDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.IDataIndexManager) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.DefaultDataIndexManager) Assert.assertNull(org.junit.Assert.assertNull) Assert.assertFalse(org.junit.Assert.assertFalse) StringCell(org.knime.core.data.def.StringCell) BitSet(java.util.BitSet) MissingValueHandling(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.MissingValueHandling) Assert.assertEquals(org.junit.Assert.assertEquals) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell)

Example 25 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class BlobsInSetCellWorkflowTest method createBDT.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable createBDT(final ExecutionContext exec) {
    DataType t = ListCell.getCollectionType(DataType.getType(DataCell.class));
    BufferedDataContainer c = exec.createDataContainer(new DataTableSpec(new DataColumnSpecCreator("Sequence", t).createSpec()));
    for (int i = 0; i < ROW_COUNT; i++) {
        String s = "someName_" + i;
        // every other a ordinary string cell
        Collection<DataCell> cells = new ArrayList<DataCell>();
        for (int j = 0; j < LIST_SIZE * 2; j++) {
            String val = "Row_" + i + "; Cell index " + j;
            if (j % 2 == 0) {
                cells.add(new LargeBlobCell(val, LargeBlobCell.SIZE_OF_CELL));
            } else {
                cells.add(new StringCell(val));
            }
        }
        ListCell cell = CollectionCellFactory.createListCell(cells);
        c.addRowToTable(new DefaultRow(s, cell));
    }
    c.close();
    return c.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ListCell(org.knime.core.data.collection.ListCell) ArrayList(java.util.ArrayList) LargeBlobCell(org.knime.testing.data.blob.LargeBlobCell) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Aggregations

StringCell (org.knime.core.data.def.StringCell)176 DataCell (org.knime.core.data.DataCell)130 DoubleCell (org.knime.core.data.def.DoubleCell)67 DefaultRow (org.knime.core.data.def.DefaultRow)65 IntCell (org.knime.core.data.def.IntCell)55 DataRow (org.knime.core.data.DataRow)52 DataTableSpec (org.knime.core.data.DataTableSpec)49 ArrayList (java.util.ArrayList)41 DataColumnSpec (org.knime.core.data.DataColumnSpec)37 RowKey (org.knime.core.data.RowKey)36 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)26 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)26 DataType (org.knime.core.data.DataType)22 LinkedHashSet (java.util.LinkedHashSet)21 BufferedDataTable (org.knime.core.node.BufferedDataTable)20 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)19 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)16 LinkedHashMap (java.util.LinkedHashMap)15 Test (org.junit.Test)15 HashMap (java.util.HashMap)11