Search in sources :

Example 46 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class CollectionSplitNodeModel method refineTypes.

/**
 * Retype the argument table to use the types as determined by the
 * cell factory.
 */
private BufferedDataTable refineTypes(final BufferedDataTable table, final SplitCellFactory fac, final ExecutionContext exec) {
    HashMap<String, Integer> colMap = new HashMap<String, Integer>();
    DataTableSpec spec = table.getDataTableSpec();
    DataColumnSpec[] newColSpecs = new DataColumnSpec[spec.getNumColumns()];
    for (int i = 0; i < spec.getNumColumns(); i++) {
        colMap.put(spec.getColumnSpec(i).getName(), i);
        newColSpecs[i] = spec.getColumnSpec(i);
    }
    DataColumnSpec[] oldReplacedSpecs = fac.getColumnSpecs();
    DataType[] mostSpecificTypes = fac.getCommonTypes();
    DataColumnDomain[] domains = fac.getDomains();
    for (int i = 0; i < oldReplacedSpecs.length; i++) {
        DataColumnSpec s = oldReplacedSpecs[i];
        Integer index = colMap.get(s.getName());
        DataColumnSpecCreator creator = new DataColumnSpecCreator(newColSpecs[index]);
        creator.setType(mostSpecificTypes[i]);
        creator.setDomain(domains[i]);
        newColSpecs[index] = creator.createSpec();
    }
    DataTableSpec newSpec = new DataTableSpec(spec.getName(), newColSpecs);
    return exec.createSpecReplacerTable(table, newSpec);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) HashMap(java.util.HashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomain(org.knime.core.data.DataColumnDomain) DataType(org.knime.core.data.DataType)

Example 47 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class CollectionSplitNodeModel method countNewColumns.

/**
 * Iterate the argument table, determine maximum element count,
 * return freshly created column specs.
 */
private DataColumnSpec[] countNewColumns(final BufferedDataTable table, final ExecutionMonitor exec) throws InvalidSettingsException, CanceledExecutionException {
    DataTableSpec spec = table.getDataTableSpec();
    long i = 0;
    long rowCount = table.size();
    int maxColumns = 0;
    int targetColIndex = getTargetColIndex(spec);
    for (DataRow row : table) {
        DataCell c = row.getCell(targetColIndex);
        if (!c.isMissing()) {
            maxColumns = Math.max(((CollectionDataValue) c).size(), maxColumns);
        }
        exec.setProgress((i++) / (double) rowCount, "Determining maximum element count, row \"" + row.getKey() + "\" (" + i + "/" + rowCount + ")");
        exec.checkCanceled();
    }
    HashSet<String> hashNames = new HashSet<String>();
    for (DataColumnSpec s : spec) {
        hashNames.add(s.getName());
    }
    if (m_settings.isReplaceInputColumn()) {
        hashNames.remove(spec.getColumnSpec(targetColIndex).getName());
    }
    DataType elementType = spec.getColumnSpec(targetColIndex).getType().getCollectionElementType();
    DataColumnSpec[] newColSpec = new DataColumnSpec[maxColumns];
    for (int j = 0; j < newColSpec.length; j++) {
        String baseName = "Split Value " + (j + 1);
        String newName = baseName;
        int uniquifier = 1;
        while (!hashNames.add(newName)) {
            newName = baseName + "(#" + (uniquifier++) + ")";
        }
        newColSpec[j] = new DataColumnSpecCreator(newName, elementType).createSpec();
    }
    return newColSpec;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataCell(org.knime.core.data.DataCell) DataType(org.knime.core.data.DataType) CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) HashSet(java.util.HashSet)

Example 48 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class RuleEngineNodeModel method createRearranger.

private ColumnRearranger createRearranger(final DataTableSpec inSpec, final List<Rule> rules) throws InvalidSettingsException {
    ColumnRearranger crea = new ColumnRearranger(inSpec);
    String newColName = DataTableSpec.getUniqueColumnName(inSpec, m_settings.getNewColName());
    final int defaultLabelColumnIndex;
    if (m_settings.getDefaultLabelIsColumn()) {
        if (m_settings.getDefaultLabel().length() < 3) {
            throw new InvalidSettingsException("Default label is not a column reference");
        }
        if (!m_settings.getDefaultLabel().startsWith("$") || !m_settings.getDefaultLabel().endsWith("$")) {
            throw new InvalidSettingsException("Column references in default label must be enclosed in $");
        }
        String colRef = m_settings.getDefaultLabel().substring(1, m_settings.getDefaultLabel().length() - 1);
        defaultLabelColumnIndex = inSpec.findColumnIndex(colRef);
        if (defaultLabelColumnIndex == -1) {
            throw new InvalidSettingsException("Column '" + m_settings.getDefaultLabel() + "' for default label does not exist in input table");
        }
    } else {
        defaultLabelColumnIndex = -1;
    }
    // determine output type
    List<DataType> types = new ArrayList<DataType>();
    // add outcome column types
    for (Rule r : rules) {
        if (r.getOutcome() instanceof ColumnReference) {
            types.add(((ColumnReference) r.getOutcome()).spec.getType());
        } else if (r.getOutcome() instanceof Double) {
            types.add(DoubleCell.TYPE);
        } else if (r.getOutcome() instanceof Integer) {
            types.add(IntCell.TYPE);
        } else if (r.getOutcome().toString().length() > 0) {
            types.add(StringCell.TYPE);
        }
    }
    if (defaultLabelColumnIndex >= 0) {
        types.add(inSpec.getColumnSpec(defaultLabelColumnIndex).getType());
    } else if (m_settings.getDefaultLabel().length() > 0) {
        try {
            Integer.parseInt(m_settings.getDefaultLabel());
            types.add(IntCell.TYPE);
        } catch (NumberFormatException ex) {
            try {
                Double.parseDouble(m_settings.getDefaultLabel());
                types.add(DoubleCell.TYPE);
            } catch (NumberFormatException ex1) {
                types.add(StringCell.TYPE);
            }
        }
    }
    final DataType outType;
    if (types.size() > 0) {
        DataType temp = types.get(0);
        for (int i = 1; i < types.size(); i++) {
            temp = DataType.getCommonSuperType(temp, types.get(i));
        }
        if ((temp.getValueClasses().size() == 1) && temp.getValueClasses().contains(DataValue.class)) {
            // a non-native type, we replace it with string
            temp = StringCell.TYPE;
        }
        outType = temp;
    } else {
        outType = StringCell.TYPE;
    }
    DataColumnSpec cs = new DataColumnSpecCreator(newColName, outType).createSpec();
    crea.append(new SingleCellFactory(cs) {

        @Override
        public DataCell getCell(final DataRow row) {
            for (Rule r : rules) {
                if (r.matches(row)) {
                    Object outcome = r.getOutcome();
                    if (outcome instanceof ColumnReference) {
                        DataCell cell = row.getCell(((ColumnReference) outcome).index);
                        if (outType.equals(StringCell.TYPE) && !cell.isMissing() && !cell.getType().equals(StringCell.TYPE)) {
                            return new StringCell(cell.toString());
                        } else {
                            return cell;
                        }
                    } else if (outType.equals(IntCell.TYPE)) {
                        return new IntCell((Integer) outcome);
                    } else if (outType.equals(DoubleCell.TYPE)) {
                        return new DoubleCell((Double) outcome);
                    } else {
                        return new StringCell(outcome.toString());
                    }
                }
            }
            if (defaultLabelColumnIndex >= 0) {
                DataCell cell = row.getCell(defaultLabelColumnIndex);
                if (outType.equals(StringCell.TYPE) && !cell.getType().equals(StringCell.TYPE)) {
                    return new StringCell(cell.toString());
                } else {
                    return cell;
                }
            } else if (m_settings.getDefaultLabel().length() > 0) {
                String l = m_settings.getDefaultLabel();
                if (outType.equals(StringCell.TYPE)) {
                    return new StringCell(l);
                }
                try {
                    int i = Integer.parseInt(l);
                    return new IntCell(i);
                } catch (NumberFormatException ex) {
                    try {
                        double d = Double.parseDouble(l);
                        return new DoubleCell(d);
                    } catch (NumberFormatException ex1) {
                        return new StringCell(l);
                    }
                }
            } else {
                return DataType.getMissingCell();
            }
        }
    });
    return crea;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) DataValue(org.knime.core.data.DataValue) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) StringCell(org.knime.core.data.def.StringCell) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) ColumnReference(org.knime.base.node.rules.Rule.ColumnReference)

Example 49 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class AbstractTreeEnsembleModel method createNominalNumericPredictorRecord.

private PredictorRecord createNominalNumericPredictorRecord(final DataRow filterRow, final DataTableSpec trainSpec) {
    final int nrCols = trainSpec.getNumColumns();
    Map<String, Object> valueMap = new LinkedHashMap<String, Object>((int) (nrCols / 0.75 + 1.0));
    for (int i = 0; i < nrCols; i++) {
        DataColumnSpec col = trainSpec.getColumnSpec(i);
        String colName = col.getName();
        DataType colType = col.getType();
        DataCell cell = filterRow.getCell(i);
        if (cell.isMissing()) {
            valueMap.put(colName, PredictorRecord.NULL);
        } else if (colType.isCompatible(NominalValue.class)) {
            TreeNominalColumnMetaData nomColMeta = (TreeNominalColumnMetaData) m_metaData.getAttributeMetaData(i);
            NominalValueRepresentation[] nomVals = nomColMeta.getValues();
            int assignedInteger = -1;
            String val = cell.toString();
            // find assignedInteger of value
            for (NominalValueRepresentation nomVal : nomVals) {
                if (nomVal.getNominalValue().equals(val)) {
                    assignedInteger = nomVal.getAssignedInteger();
                    break;
                }
            }
            // the value is not known to the model
            if (assignedInteger == -1) {
                // treat as missing value
                valueMap.put(colName, PredictorRecord.NULL);
            } else {
                valueMap.put(colName, Integer.valueOf(assignedInteger));
            }
        } else if (colType.isCompatible(DoubleValue.class)) {
            double val = ((DoubleValue) cell).getDoubleValue();
            if (Double.isNaN(val)) {
                // make sure that NaNs are treated as missing values
                // bug AP-7169
                valueMap.put(colName, PredictorRecord.NULL);
            } else {
                valueMap.put(colName, val);
            }
        } else {
            throw new IllegalStateException("Expected nominal or numeric column type for column \"" + colName + "\" but got \"" + colType + "\"");
        }
    }
    return new PredictorRecord(valueMap);
}
Also used : NominalValue(org.knime.core.data.NominalValue) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) LinkedHashMap(java.util.LinkedHashMap) TreeNominalColumnMetaData(org.knime.base.node.mine.treeensemble2.data.TreeNominalColumnMetaData) DataColumnSpec(org.knime.core.data.DataColumnSpec) DoubleValue(org.knime.core.data.DoubleValue) PredictorRecord(org.knime.base.node.mine.treeensemble2.data.PredictorRecord) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell)

Example 50 with DataType

use of org.knime.core.data.DataType in project knime-core by knime.

the class RegressionTreeModelPortObjectSpec method calculateFilterIndices.

public int[] calculateFilterIndices(final DataTableSpec testTableInput) throws InvalidSettingsException {
    DataTableSpec learnSpec = getLearnTableSpec();
    // check existence and types of columns, create reordering
    int[] result = new int[learnSpec.getNumColumns()];
    for (int i = 0; i < learnSpec.getNumColumns(); i++) {
        DataColumnSpec learnCol = learnSpec.getColumnSpec(i);
        final String colName = learnCol.getName();
        int dataColIndex = testTableInput.findColumnIndex(colName);
        if (dataColIndex < 0) {
            throw new InvalidSettingsException("Required data column \"" + colName + "\" does not exist in table");
        }
        DataColumnSpec dataCol = testTableInput.getColumnSpec(dataColIndex);
        // expected type
        DataType eType = learnCol.getType();
        // actual type
        DataType aType = dataCol.getType();
        String errorType = null;
        if (eType.isCompatible(NominalValue.class) && !aType.isCompatible(NominalValue.class)) {
            errorType = "nominal";
        }
        if (eType.isCompatible(DoubleValue.class) && !aType.isCompatible(DoubleValue.class)) {
            errorType = "numeric";
        }
        if (eType.isCompatible(BitVectorValue.class) && !aType.isCompatible(BitVectorValue.class)) {
            errorType = "fingerprint/bitvector";
        }
        if (eType.isCompatible(ByteVectorValue.class) && !aType.isCompatible(ByteVectorValue.class)) {
            errorType = "fingerprint/bytevector";
        }
        if (errorType != null) {
            throw new InvalidSettingsException("Column \"" + colName + "\" does exist in the data but" + "is not of the expected " + errorType + " type");
        }
        result[i] = dataColIndex;
    }
    return result;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) DoubleValue(org.knime.core.data.DoubleValue) NominalValue(org.knime.core.data.NominalValue) DataType(org.knime.core.data.DataType) ByteVectorValue(org.knime.core.data.vector.bytevector.ByteVectorValue) BitVectorValue(org.knime.core.data.vector.bitvector.BitVectorValue)

Aggregations

DataType (org.knime.core.data.DataType)330 DataColumnSpec (org.knime.core.data.DataColumnSpec)142 DataTableSpec (org.knime.core.data.DataTableSpec)101 DataCell (org.knime.core.data.DataCell)96 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)95 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)71 DoubleValue (org.knime.core.data.DoubleValue)67 DataRow (org.knime.core.data.DataRow)61 ArrayList (java.util.ArrayList)55 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)34 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)32 DefaultRow (org.knime.core.data.def.DefaultRow)24 HashSet (java.util.HashSet)23 HashMap (java.util.HashMap)20 StringCell (org.knime.core.data.def.StringCell)20 NominalValue (org.knime.core.data.NominalValue)18 DoubleCell (org.knime.core.data.def.DoubleCell)18 IntCell (org.knime.core.data.def.IntCell)18 BitVectorValue (org.knime.core.data.vector.bitvector.BitVectorValue)18 ByteVectorValue (org.knime.core.data.vector.bytevector.ByteVectorValue)18