use of org.knime.core.data.DataColumnProperties in project knime-core by knime.
the class MissingValueHandling2NodeDialogPane method createAsIncompatibleMarkedColumnSpec.
/**
* @param dataColumnSpec
* @return
*/
private static DataColumnSpec createAsIncompatibleMarkedColumnSpec(final DataColumnSpec originalSpec) {
DataColumnSpecCreator creator = new DataColumnSpecCreator(originalSpec);
final DataColumnProperties origProps = originalSpec.getProperties();
final Map<String, String> map = createIncompatiblePropertiesMap();
final DataColumnProperties props;
if (origProps != null) {
props = origProps.cloneAndOverwrite(map);
} else {
props = new DataColumnProperties(map);
}
creator.setProperties(props);
final DataColumnSpec invalidSpec = creator.createSpec();
return invalidSpec;
}
use of org.knime.core.data.DataColumnProperties in project knime-core by knime.
the class LogRegCoordinator method init.
/**
* Initialize instance and check if settings are consistent.
*/
private void init(final DataTableSpec inSpec, final Set<String> exclude) throws InvalidSettingsException {
List<String> inputCols = new ArrayList<String>();
FilterResult includedColumns = m_settings.getIncludedColumns().applyTo(inSpec);
for (String column : includedColumns.getIncludes()) {
inputCols.add(column);
}
inputCols.remove(m_settings.getTargetColumn());
if (inputCols.isEmpty()) {
throw new InvalidSettingsException("At least one column must " + "be included.");
}
DataColumnSpec targetColSpec = null;
List<DataColumnSpec> regressorColSpecs = new ArrayList<DataColumnSpec>();
// Auto configuration when target is not set
if (null == m_settings.getTargetColumn() && m_settings.getIncludedColumns().applyTo(inSpec).getExcludes().length == 0) {
for (int i = 0; i < inSpec.getNumColumns(); i++) {
DataColumnSpec colSpec = inSpec.getColumnSpec(i);
String colName = colSpec.getName();
inputCols.remove(colName);
if (colSpec.getType().isCompatible(NominalValue.class)) {
m_settings.setTargetColumn(colName);
}
}
// when there is no column with nominal data
if (null == m_settings.getTargetColumn()) {
throw new InvalidSettingsException("No column in " + "spec compatible to \"NominalValue\".");
}
}
// remove all columns that should not be used
inputCols.removeAll(exclude);
m_specialColumns = new LinkedList<>();
for (int i = 0; i < inSpec.getNumColumns(); i++) {
DataColumnSpec colSpec = inSpec.getColumnSpec(i);
String colName = colSpec.getName();
final DataType type = colSpec.getType();
if (m_settings.getTargetColumn().equals(colName)) {
if (type.isCompatible(NominalValue.class)) {
targetColSpec = colSpec;
} else {
throw new InvalidSettingsException("Type of column \"" + colName + "\" is not nominal.");
}
} else if (inputCols.contains(colName)) {
if (type.isCompatible(DoubleValue.class) || type.isCompatible(NominalValue.class)) {
regressorColSpecs.add(colSpec);
} else if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class) || (type.isCollectionType() && type.getCollectionElementType().isCompatible(DoubleValue.class))) {
m_specialColumns.add(colSpec);
// We change the table spec later to encode it as a string.
regressorColSpecs.add(new DataColumnSpecCreator(colSpec.getName(), StringCell.TYPE).createSpec());
} else {
throw new InvalidSettingsException("Type of column \"" + colName + "\" is not one of the allowed types, " + "which are numeric or nomial.");
}
}
}
if (null != targetColSpec) {
// Check if target has at least two categories.
final Set<DataCell> targetValues = targetColSpec.getDomain().getValues();
if (targetValues != null && targetValues.size() < 2) {
throw new InvalidSettingsException("The target column \"" + targetColSpec.getName() + "\" has one value, only. " + "At least two target categories are expected.");
}
String[] learnerCols = new String[regressorColSpecs.size() + 1];
for (int i = 0; i < regressorColSpecs.size(); i++) {
learnerCols[i] = regressorColSpecs.get(i).getName();
}
learnerCols[learnerCols.length - 1] = targetColSpec.getName();
final DataColumnSpec[] updatedSpecs = new DataColumnSpec[inSpec.getNumColumns()];
for (int i = updatedSpecs.length; i-- > 0; ) {
final DataColumnSpec columnSpec = inSpec.getColumnSpec(i);
final DataType type = columnSpec.getType();
if (type.isCompatible(BitVectorValue.class) || type.isCompatible(ByteVectorValue.class)) {
final DataColumnSpecCreator colSpecCreator = new DataColumnSpecCreator(columnSpec.getName(), StringCell.TYPE);
colSpecCreator.setProperties(new DataColumnProperties(Collections.singletonMap("realType", type.isCompatible(BitVectorValue.class) ? "BitVector" : "ByteVector")));
updatedSpecs[i] = colSpecCreator.createSpec();
} else {
updatedSpecs[i] = columnSpec;
}
}
DataTableSpec updated = new DataTableSpec(updatedSpecs);
PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(updated);
creator.setTargetCols(Arrays.asList(targetColSpec));
creator.setLearningCols(regressorColSpecs);
// creator.addPreprocColNames(m_specialColumns.stream().flatMap(spec -> ));
m_pmmlOutSpec = creator.createSpec();
} else {
throw new InvalidSettingsException("The target is " + "not in the input.");
}
}
use of org.knime.core.data.DataColumnProperties in project knime-core by knime.
the class SourceColumnsAsProperties method toProperties.
/**
* Creates the {@link DataColumnProperties} with the
* {@link #PROPKEY_SOURCE_COLUMN_INDICES} keys to the column names and column values respectively.
*
* @param selection The model for the selected columns.
* @param input The input {@link DataTableSpec}.
* @return The properties with the column names and column indices encoded as a string value for the specified keys.
*/
public static DataColumnProperties toProperties(final SettingsModelColumnFilter2 selection, final DataTableSpec input) {
Map<String, String> map = new HashMap<String, String>();
FilterResult filterResult = selection.applyTo(input);
map.put(PROPKEY_SOURCE_COLUMN_INDICES, indicesAsString(filterResult, input));
return new DataColumnProperties(map);
}
use of org.knime.core.data.DataColumnProperties in project knime-core by knime.
the class PMMLGeneralRegressionTranslator method exportTo.
/**
* {@inheritDoc}
*/
@Override
public SchemaType exportTo(final PMMLDocument pmmlDoc, final PMMLPortObjectSpec spec) {
m_nameMapper = new DerivedFieldMapper(pmmlDoc);
GeneralRegressionModel reg = pmmlDoc.getPMML().addNewGeneralRegressionModel();
final JsonObjectBuilder jsonBuilder = Json.createObjectBuilder();
if (!m_content.getVectorLengths().isEmpty()) {
LocalTransformations localTransformations = reg.addNewLocalTransformations();
for (final Entry<? extends String, ? extends Integer> entry : m_content.getVectorLengths().entrySet()) {
DataColumnSpec columnSpec = spec.getDataTableSpec().getColumnSpec(entry.getKey());
if (columnSpec != null) {
final DataType type = columnSpec.getType();
final DataColumnProperties props = columnSpec.getProperties();
final boolean bitVector = type.isCompatible(BitVectorValue.class) || (type.isCompatible(StringValue.class) && props.containsProperty("realType") && "BitVector".equals(props.getProperty("realType")));
final boolean byteVector = type.isCompatible(ByteVectorValue.class) || (type.isCompatible(StringValue.class) && props.containsProperty("realType") && "ByteVector".equals(props.getProperty("realType")));
final String lengthAsString;
final int width;
if (byteVector) {
lengthAsString = "3";
width = 4;
} else if (bitVector) {
lengthAsString = "1";
width = 1;
} else {
throw new UnsupportedOperationException("Not supported type: " + type + " for column: " + columnSpec);
}
for (int i = 0; i < entry.getValue().intValue(); ++i) {
final DerivedField derivedField = localTransformations.addNewDerivedField();
derivedField.setOptype(OPTYPE.CONTINUOUS);
derivedField.setDataType(DATATYPE.INTEGER);
derivedField.setName(entry.getKey() + "[" + i + "]");
Apply apply = derivedField.addNewApply();
apply.setFunction("substring");
apply.addNewFieldRef().setField(entry.getKey());
Constant from = apply.addNewConstant();
from.setDataType(DATATYPE.INTEGER);
from.setStringValue(bitVector ? Long.toString(entry.getValue().longValue() - i) : Long.toString(i * width + 1L));
Constant length = apply.addNewConstant();
length.setDataType(DATATYPE.INTEGER);
length.setStringValue(lengthAsString);
}
}
jsonBuilder.add(entry.getKey(), entry.getValue().intValue());
}
}
// PMMLPortObjectSpecCreator newSpecCreator = new PMMLPortObjectSpecCreator(spec);
// newSpecCreator.addPreprocColNames(m_content.getVectorLengths().entrySet().stream()
// .flatMap(
// e -> IntStream.iterate(0, o -> o + 1).limit(e.getValue()).mapToObj(i -> e.getKey() + "[" + i + "]"))
// .collect(Collectors.toList()));
PMMLMiningSchemaTranslator.writeMiningSchema(spec, reg);
// if (!m_content.getVectorLengths().isEmpty()) {
// Extension miningExtension = reg.getMiningSchema().addNewExtension();
// miningExtension.setExtender(EXTENDER);
// miningExtension.setName(VECTOR_COLUMNS_WITH_LENGTH);
// miningExtension.setValue(jsonBuilder.build().toString());
// }
reg.setModelType(getPMMLRegModelType(m_content.getModelType()));
reg.setFunctionName(getPMMLMiningFunction(m_content.getFunctionName()));
String algorithmName = m_content.getAlgorithmName();
if (algorithmName != null && !algorithmName.isEmpty()) {
reg.setAlgorithmName(algorithmName);
}
String modelName = m_content.getModelName();
if (modelName != null && !modelName.isEmpty()) {
reg.setModelName(modelName);
}
String targetReferenceCategory = m_content.getTargetReferenceCategory();
if (targetReferenceCategory != null && !targetReferenceCategory.isEmpty()) {
reg.setTargetReferenceCategory(targetReferenceCategory);
}
if (m_content.getOffsetValue() != null) {
reg.setOffsetValue(m_content.getOffsetValue());
}
// add parameter list
ParameterList paramList = reg.addNewParameterList();
for (PMMLParameter p : m_content.getParameterList()) {
Parameter param = paramList.addNewParameter();
param.setName(p.getName());
String label = p.getLabel();
if (label != null) {
param.setLabel(m_nameMapper.getDerivedFieldName(label));
}
}
// add factor list
FactorList factorList = reg.addNewFactorList();
for (PMMLPredictor p : m_content.getFactorList()) {
Predictor predictor = factorList.addNewPredictor();
predictor.setName(m_nameMapper.getDerivedFieldName(p.getName()));
}
// add covariate list
CovariateList covariateList = reg.addNewCovariateList();
for (PMMLPredictor p : m_content.getCovariateList()) {
Predictor predictor = covariateList.addNewPredictor();
predictor.setName(m_nameMapper.getDerivedFieldName(p.getName()));
}
// add PPMatrix
PPMatrix ppMatrix = reg.addNewPPMatrix();
for (PMMLPPCell p : m_content.getPPMatrix()) {
PPCell cell = ppMatrix.addNewPPCell();
cell.setValue(p.getValue());
cell.setPredictorName(m_nameMapper.getDerivedFieldName(p.getPredictorName()));
cell.setParameterName(p.getParameterName());
String targetCategory = p.getTargetCategory();
if (targetCategory != null && !targetCategory.isEmpty()) {
cell.setTargetCategory(targetCategory);
}
}
// add CovMatrix
if (m_content.getPCovMatrix().length > 0) {
PCovMatrix pCovMatrix = reg.addNewPCovMatrix();
for (PMMLPCovCell p : m_content.getPCovMatrix()) {
PCovCell covCell = pCovMatrix.addNewPCovCell();
covCell.setPRow(p.getPRow());
covCell.setPCol(p.getPCol());
String tCol = p.getTCol();
String tRow = p.getTRow();
if (tRow != null || tCol != null) {
covCell.setTRow(tRow);
covCell.setTCol(tCol);
}
covCell.setValue(p.getValue());
String targetCategory = p.getTargetCategory();
if (targetCategory != null && !targetCategory.isEmpty()) {
covCell.setTargetCategory(targetCategory);
}
}
}
// add ParamMatrix
ParamMatrix paramMatrix = reg.addNewParamMatrix();
for (PMMLPCell p : m_content.getParamMatrix()) {
PCell pCell = paramMatrix.addNewPCell();
String targetCategory = p.getTargetCategory();
if (targetCategory != null) {
pCell.setTargetCategory(targetCategory);
}
pCell.setParameterName(p.getParameterName());
pCell.setBeta(p.getBeta());
Integer df = p.getDf();
if (df != null) {
pCell.setDf(BigInteger.valueOf(df));
}
}
return GeneralRegressionModel.type;
}
use of org.knime.core.data.DataColumnProperties in project knime-core by knime.
the class ClusterMembershipFactory method getColumnSpecs.
/**
* {@inheritDoc}
*/
public DataColumnSpec[] getColumnSpecs() {
int nrclusters = m_nrClusters;
DataColumnSpec[] newSpec = new DataColumnSpec[nrclusters + 1];
int cluster = 0;
DataColumnSpecCreator colspecCreator = null;
for (int j = 0; j < nrclusters; j++) {
if (m_noise && j == (newSpec.length - 2)) {
colspecCreator = new DataColumnSpecCreator(FuzzyClusterNodeModel.NOISESPEC_KEY, DoubleCell.TYPE);
colspecCreator.setProperties(new DataColumnProperties(Collections.singletonMap(DataValueRenderer.PROPERTY_PREFERRED_RENDERER, DoubleBarRenderer.DESCRIPTION)));
colspecCreator.setDomain(new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain());
newSpec[j] = colspecCreator.createSpec();
break;
}
colspecCreator = new DataColumnSpecCreator(FuzzyClusterNodeModel.CLUSTER_KEY + cluster, DoubleCell.TYPE);
colspecCreator.setProperties(new DataColumnProperties(Collections.singletonMap(DataValueRenderer.PROPERTY_PREFERRED_RENDERER, DoubleBarRenderer.DESCRIPTION)));
colspecCreator.setDomain(new DataColumnDomainCreator(new DoubleCell(0.0), new DoubleCell(1.0)).createDomain());
newSpec[j] = colspecCreator.createSpec();
cluster++;
}
newSpec[newSpec.length - 1] = new DataColumnSpecCreator("Winner Cluster", StringCell.TYPE).createSpec();
return newSpec;
}
Aggregations