use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class TreeNodeRegression method createDecisionTreeNode.
/**
* @param metaData
* @return
*/
public DecisionTreeNode createDecisionTreeNode(final MutableInteger idGenerator, final TreeMetaData metaData) {
DataCell majorityCell = new StringCell(DoubleFormat.formatDouble(m_mean));
final int nrChildren = getNrChildren();
LinkedHashMap<DataCell, Double> distributionMap = new LinkedHashMap<DataCell, Double>();
distributionMap.put(majorityCell, m_totalSum);
if (nrChildren == 0) {
return new DecisionTreeNodeLeaf(idGenerator.inc(), majorityCell, distributionMap);
} else {
int id = idGenerator.inc();
DecisionTreeNode[] childNodes = new DecisionTreeNode[nrChildren];
int splitAttributeIndex = getSplitAttributeIndex();
assert splitAttributeIndex >= 0 : "non-leaf node has no split";
String splitAttribute = metaData.getAttributeMetaData(splitAttributeIndex).getAttributeName();
PMMLPredicate[] childPredicates = new PMMLPredicate[nrChildren];
for (int i = 0; i < nrChildren; i++) {
final TreeNodeRegression treeNode = getChild(i);
TreeNodeCondition cond = treeNode.getCondition();
childPredicates[i] = cond.toPMMLPredicate();
childNodes[i] = treeNode.createDecisionTreeNode(idGenerator, metaData);
}
return new DecisionTreeNodeSplitPMML(id, majorityCell, distributionMap, splitAttribute, childPredicates, childNodes);
}
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class DBRowIteratorImpl method readArray.
protected DataCell readArray(final int i) throws SQLException {
final Array array = m_result.getArray(i + 1);
if (wasNull() || array == null) {
return DataType.getMissingCell();
} else {
final Object[] vals = (Object[]) array.getArray();
final Collection<DataCell> cells = new ArrayList<>(vals.length);
for (Object val : vals) {
cells.add(val == null ? DataType.getMissingCell() : new StringCell(val.toString()));
}
return CollectionCellFactory.createListCell(cells);
}
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class PMMLDataDictionaryTranslator method addColSpecsForDataFields.
/**
* @param pmmlDoc the PMML document to analyze
* @param colSpecs the list to add the data column specs to
*/
private void addColSpecsForDataFields(final PMMLDocument pmmlDoc, final List<DataColumnSpec> colSpecs) {
DataDictionary dict = pmmlDoc.getPMML().getDataDictionary();
for (DataField dataField : dict.getDataFieldArray()) {
String name = dataField.getName();
DataType dataType = getKNIMEDataType(dataField.getDataType());
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(name, dataType);
DataColumnDomain domain = null;
if (dataType.isCompatible(NominalValue.class)) {
Value[] valueArray = dataField.getValueArray();
DataCell[] cells;
if (DataType.getType(StringCell.class).equals(dataType)) {
if (dataField.getIntervalArray().length > 0) {
throw new IllegalArgumentException("Intervals cannot be defined for Strings.");
}
cells = new StringCell[valueArray.length];
if (valueArray.length > 0) {
for (int j = 0; j < cells.length; j++) {
cells[j] = new StringCell(valueArray[j].getValue());
}
}
domain = new DataColumnDomainCreator(cells).createDomain();
}
} else if (dataType.isCompatible(DoubleValue.class)) {
Double leftMargin = null;
Double rightMargin = null;
Interval[] intervalArray = dataField.getIntervalArray();
if (intervalArray != null && intervalArray.length > 0) {
Interval interval = dataField.getIntervalArray(0);
leftMargin = interval.getLeftMargin();
rightMargin = interval.getRightMargin();
} else if (dataField.getValueArray() != null && dataField.getValueArray().length > 0) {
// try to derive the bounds from the values
Value[] valueArray = dataField.getValueArray();
List<Double> values = new ArrayList<Double>();
for (int j = 0; j < valueArray.length; j++) {
String value = "";
try {
value = valueArray[j].getValue();
values.add(Double.parseDouble(value));
} catch (Exception e) {
throw new IllegalArgumentException("Skipping domain calculation. " + "Value \"" + value + "\" cannot be cast to double.");
}
}
leftMargin = Collections.min(values);
rightMargin = Collections.max(values);
}
if (leftMargin != null && rightMargin != null) {
// set the bounds of the domain if available
DataCell lowerBound = null;
DataCell upperBound = null;
if (DataType.getType(IntCell.class).equals(dataType)) {
lowerBound = new IntCell(leftMargin.intValue());
upperBound = new IntCell(rightMargin.intValue());
} else if (DataType.getType(DoubleCell.class).equals(dataType)) {
lowerBound = new DoubleCell(leftMargin);
upperBound = new DoubleCell(rightMargin);
}
domain = new DataColumnDomainCreator(lowerBound, upperBound).createDomain();
} else {
domain = new DataColumnDomainCreator().createDomain();
}
}
specCreator.setDomain(domain);
colSpecs.add(specCreator.createSpec());
m_dictFields.add(name);
}
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class TreeNominalColumnDataTest method createPCATestData.
private static Pair<TreeNominalColumnData, TreeTargetNominalColumnData> createPCATestData(final TreeEnsembleLearnerConfiguration config) {
DataColumnSpec colSpec = new DataColumnSpecCreator("test-col", StringCell.TYPE).createSpec();
final String[] attVals = new String[] { "A", "B", "C", "D", "E" };
final String[] classes = new String[] { "T1", "T2", "T3" };
TreeNominalColumnDataCreator colCreator = new TreeNominalColumnDataCreator(colSpec);
DataColumnSpecCreator specCreator = new DataColumnSpecCreator("target-col", StringCell.TYPE);
specCreator.setDomain(new DataColumnDomainCreator(Arrays.stream(classes).distinct().map(s -> new StringCell(s)).toArray(i -> new StringCell[i])).createDomain());
DataColumnSpec targetSpec = specCreator.createSpec();
TreeTargetColumnDataCreator targetCreator = new TreeTargetNominalColumnDataCreator(targetSpec);
long rowKeyCounter = 0;
final int[][] classDistributions = new int[][] { { 40, 10, 10 }, { 10, 40, 10 }, { 20, 30, 10 }, { 20, 15, 25 }, { 10, 5, 45 } };
for (int i = 0; i < attVals.length; i++) {
for (int j = 0; j < classes.length; j++) {
for (int k = 0; k < classDistributions[i][j]; k++) {
RowKey key = RowKey.createRowKey(rowKeyCounter++);
colCreator.add(key, new StringCell(attVals[i]));
targetCreator.add(key, new StringCell(classes[j]));
}
}
}
final TreeNominalColumnData testColData = colCreator.createColumnData(0, config);
testColData.getMetaData().setAttributeIndex(0);
return Pair.create(testColData, (TreeTargetNominalColumnData) targetCreator.createColumnData());
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class BlobsInSetCellWorkflowTest method createBDT.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable createBDT(final ExecutionContext exec) {
DataType t = ListCell.getCollectionType(DataType.getType(DataCell.class));
BufferedDataContainer c = exec.createDataContainer(new DataTableSpec(new DataColumnSpecCreator("Sequence", t).createSpec()));
for (int i = 0; i < ROW_COUNT; i++) {
String s = "someName_" + i;
// every other a ordinary string cell
Collection<DataCell> cells = new ArrayList<DataCell>();
for (int j = 0; j < LIST_SIZE * 2; j++) {
String val = "Row_" + i + "; Cell index " + j;
if (j % 2 == 0) {
cells.add(new LargeBlobCell(val, LargeBlobCell.SIZE_OF_CELL));
} else {
cells.add(new StringCell(val));
}
}
ListCell cell = CollectionCellFactory.createListCell(cells);
c.addRowToTable(new DefaultRow(s, cell));
}
c.close();
return c.getTable();
}
Aggregations