use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class ClusterNodeModel method createAppendedSpec.
private DataTableSpec createAppendedSpec(final DataTableSpec originalSpec) {
// determine the possible values of the appended column
DataCell[] possibleValues = new DataCell[m_nrOfClusters.getIntValue()];
for (int i = 0; i < m_nrOfClusters.getIntValue(); i++) {
DataCell key = new StringCell(CLUSTER + i);
possibleValues[i] = key;
}
// create the domain
// 1) guess an unused name for the new column (fixes bug #1022)
String colNameGuess = "Cluster";
int uniqueNr = 0;
while (originalSpec.getColumnSpec(colNameGuess) != null) {
uniqueNr++;
colNameGuess = "Cluster_" + uniqueNr;
}
// 2) create spec
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(possibleValues);
DataColumnSpecCreator creator = new DataColumnSpecCreator(colNameGuess, StringCell.TYPE);
creator.setDomain(domainCreator.createDomain());
// create the appended column spec
DataColumnSpec labelColSpec = creator.createSpec();
return new DataTableSpec(originalSpec, new DataTableSpec(labelColSpec));
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class PMMLDecisionTreeTranslator method getClassCount.
private LinkedHashMap<DataCell, Double> getClassCount(final Node node) {
LinkedHashMap<DataCell, Double> knimeScoreDistribution = new LinkedHashMap<DataCell, Double>();
ScoreDistribution[] pmmlScoreDistArray = node.getScoreDistributionArray();
for (ScoreDistribution sd : pmmlScoreDistArray) {
String category = sd.getValue();
Double recordCount = sd.getRecordCount();
knimeScoreDistribution.put(new StringCell(category), recordCount);
}
return knimeScoreDistribution;
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class CellSplitterCellFactory method tokenizeAndCreateCollectionsCell.
/**
* Tokenizes the string representation of the given data cell and returns
* an array of data cells. The array contains only one data cell, which
* is a collection cell. Whether it is a List or Set cell is specified in
* the settings. The collection cell contains string cells. For each
* token one string cell is created.
* @param inputCell the cell to tokenize (its string representation)
* @return An array containing exactly one collection cell, storing string
* cells. For each token one string cell.
* @since 2.6
*/
private DataCell[] tokenizeAndCreateCollectionsCell(final DataCell inputCell) {
DataCell[] result = new DataCell[1];
// missing value handling
if (inputCell.isMissing()) {
Arrays.fill(result, DataType.getMissingCell());
if (m_settings.isUseEmptyString()) {
Collection<DataCell> strColl = new ArrayList<DataCell>(1);
strColl.add(EMPTY_STRINGCELL);
result[0] = CollectionCellFactory.createListCell(strColl);
}
return result;
}
final String inputString = getInputString(inputCell);
// init the tokenizer
StringReader inputReader = new StringReader(inputString);
Tokenizer tokenizer = prepareTokenizer(inputReader);
Collection<DataCell> strColl = new ArrayList<DataCell>();
String token = null;
while ((token = tokenizer.nextToken()) != null) {
if (m_settings.isTrim()) {
token = token.trim();
}
strColl.add(new StringCell(token));
}
if (m_settings.isOutputAsList()) {
result[0] = CollectionCellFactory.createListCell(strColl);
} else {
result[0] = CollectionCellFactory.createSetCell(strColl);
}
return result;
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class ColumnAutoTypeCasterNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
double progress = 0;
final BufferedDataTable data = inData[0];
BufferedDataTable outTable = inData[0];
final String[] incls = m_conf.applyTo(data.getDataTableSpec()).getIncludes();
final DataType[] types = new DataType[incls.length];
final double max = incls.length + data.size();
final String[] colNames = { "Column name", "Final column type", "Row determining final column type" };
final DataType[] colTypes = new DataType[] { StringCell.TYPE, StringCell.TYPE, StringCell.TYPE };
BufferedDataContainer reasonsCon = exec.createDataContainer(new DataTableSpec(colNames, colTypes));
setReasons(new String[incls.length][3]);
if (data.size() > 0) {
// empty table check
SimpleDateFormat dateFormat = new SimpleDateFormat(m_dateFormat);
long numberOfRows = m_quickScan ? Math.min(m_numberOfRows, data.size()) : data.size();
for (DataRow row : data) {
if (!(0 < numberOfRows--)) {
data.iterator().close();
break;
}
for (int i = 0; i < incls.length; i++) {
// guess for each cell in each column the best matching datatype
DataCell c = row.getCell(data.getDataTableSpec().findColumnIndex(incls[i]));
if (!c.isMissing() && c.toString().equals(m_missValPat)) {
continue;
}
DataType newType = typeGuesser(c, dateFormat);
if (types[i] != null) {
DataType toSet = setType(types[i], newType);
if (!toSet.equals(types[i])) {
m_reasons[i][2] = row.getKey().getString();
m_reasons[i][1] = toSet.toString();
m_reasons[i][0] = incls[i];
}
types[i] = toSet;
} else {
types[i] = newType;
String r = row.getKey().toString();
r += m_quickScan ? (" based on a quickscan.") : "";
m_reasons[i][2] = r;
m_reasons[i][1] = newType.toString();
m_reasons[i][0] = incls[i];
}
exec.checkCanceled();
}
exec.checkCanceled();
progress++;
exec.setProgress(progress / max);
}
for (int i = 0; i < types.length; i++) {
// if one column only contains missingCells than set column type to StringCell
if (types[i].equals(DataType.getMissingCell().getType())) {
types[i] = StringCell.TYPE;
}
}
ColumnRearranger arrange = new ColumnRearranger(data.getDataTableSpec());
for (int i = 0; i < incls.length; i++) {
final int colIdx = data.getDataTableSpec().findColumnIndex(incls[i]);
final DataType type = types[i];
DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(incls[i], types[i]);
DataColumnSpec colSpec = colSpecCreator.createSpec();
if (type.equals(DateAndTimeCell.TYPE)) {
arrange.replace(createDateAndTimeConverter(colIdx, colSpec), colIdx);
} else if (type.equals(LongCell.TYPE)) {
arrange.replace(createLongConverter(colIdx, colSpec), colIdx);
} else {
arrange.replace(createNumberConverter(colIdx, type, colSpec), colIdx);
}
progress++;
exec.setProgress(progress / max);
exec.checkCanceled();
}
outTable = exec.createColumnRearrangeTable(data, arrange, exec);
for (int i = 0; i < m_reasons.length; i++) {
DataCell[] row = new DataCell[m_reasons[i].length];
for (int j = 0; j < m_reasons[i].length; j++) {
row[j] = new StringCell(m_reasons[i][j]);
}
reasonsCon.addRowToTable(new DefaultRow(RowKey.createRowKey((long) i), row));
}
}
reasonsCon.close();
BufferedDataTable outReasons = reasonsCon.getTable();
return new BufferedDataTable[] { outTable, outReasons };
}
use of org.knime.core.data.def.StringCell in project knime-core by knime.
the class CellReplacerNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
String targetCol = m_targetColModel.getStringValue();
if (targetCol == null || targetCol.length() == 0) {
throw new InvalidSettingsException("No target column selected");
}
final int targetColIndex = spec.findColumnIndex(targetCol);
if (targetColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
}
final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
final boolean dictInputIsCollection;
if (m_dictInputColModel.useRowID()) {
dictInputIsCollection = false;
} else if (dictInputColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
} else {
DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
dictInputIsCollection = inS.getType().isCollectionType();
}
final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
final DataType dictOutputColType;
if (m_dictOutputColModel.useRowID()) {
dictOutputColType = StringCell.TYPE;
} else {
if (dictOutputColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
}
dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
}
final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
DataType outputType;
switch(noMatchPolicy) {
case Input:
outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
break;
default:
outputType = dictOutputColType;
}
String newColName;
if (m_appendColumnModel.getBooleanValue()) {
String newName = m_appendColumnNameModel.getStringValue();
if (newName == null || newName.length() == 0) {
throw new InvalidSettingsException("No new column name given");
}
newColName = DataTableSpec.getUniqueColumnName(spec, newName);
} else {
newColName = targetColSpec.getName();
}
DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {
private Map<DataCell, DataCell> m_dictionaryMap;
@Override
public DataCell getCell(final DataRow row) {
try {
ensureInitDictionaryMap();
} catch (CanceledExecutionException e) {
// cancellation done by the framework
return DataType.getMissingCell();
}
DataCell cell = row.getCell(targetColIndex);
DataCell output = m_dictionaryMap.get(cell);
if (output == null) {
switch(noMatchPolicy) {
case Input:
return cell;
default:
return DataType.getMissingCell();
}
}
return output;
}
private void ensureInitDictionaryMap() throws CanceledExecutionException {
if (m_dictionaryMap == null) {
m_dictionaryMap = new HashMap<DataCell, DataCell>();
int i = 0;
double rowCount = dictTable.size();
for (DataRow r : dictTable) {
dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
dictionaryInitExec.checkCanceled();
DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
if (input.isMissing()) {
addSearchPair(input, output);
} else if (dictInputIsCollection) {
CollectionDataValue v = (CollectionDataValue) input;
for (DataCell element : v) {
addSearchPair(element, output);
}
} else {
addSearchPair(input, output);
}
}
}
}
private void addSearchPair(final DataCell input, final DataCell output) {
if (m_dictionaryMap.put(input, output) != null) {
setWarningMessage("Duplicate search key \"" + input + "\"");
}
}
};
ColumnRearranger result = new ColumnRearranger(spec);
if (m_appendColumnModel.getBooleanValue()) {
result.append(c);
} else {
result.replace(c, targetColIndex);
}
return result;
}
Aggregations