use of org.knime.core.data.container.CellFactory in project knime-core by knime.
the class FuzzyClusterNodeModel method execute.
/**
* Generate new clustering based on InputDataTable and specified number of
* clusters. In the output table, you will find the datarow with
* supplementary information about the membership to each cluster center.
* OUTPORT = original datarows with cluster membership information
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable indata = (BufferedDataTable) inData[0];
m_clusters = null;
m_betweenClusterVariation = Double.NaN;
m_withinClusterVariation = null;
if (m_noise) {
if (m_calculateDelta) {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_lambda);
}
} else {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier, m_calculateDelta, m_delta);
}
}
} else {
if (m_memory) {
m_fcmAlgo = new FCMAlgorithmMemory(m_nrClusters, m_fuzzifier);
} else {
m_fcmAlgo = new FCMAlgorithm(m_nrClusters, m_fuzzifier);
}
}
int nrRows = indata.getRowCount();
DataTableSpec spec = indata.getDataTableSpec();
int nrCols = spec.getNumColumns();
List<String> learningCols = new LinkedList<String>();
List<String> ignoreCols = new LinkedList<String>();
// counter for included columns
int z = 0;
final int[] columns = new int[m_list.size()];
for (int i = 0; i < nrCols; i++) {
// if include does contain current column name
String colname = spec.getColumnSpec(i).getName();
if (m_list.contains(colname)) {
columns[z] = i;
z++;
learningCols.add(colname);
} else {
ignoreCols.add(colname);
}
}
ColumnRearranger colre = new ColumnRearranger(spec);
colre.keepOnly(columns);
BufferedDataTable filteredtable = exec.createColumnRearrangeTable(indata, colre, exec);
// get dimension of feature space
int dimension = filteredtable.getDataTableSpec().getNumColumns();
Random random = new Random();
if (m_useRandomSeed) {
random.setSeed(m_randomSeed);
}
m_fcmAlgo.init(nrRows, dimension, filteredtable, random);
// main loop - until clusters stop changing or maxNrIterations reached
int currentIteration = 0;
double totalchange = Double.MAX_VALUE;
while ((totalchange > 1e-7) && (currentIteration < m_maxNrIterations)) {
exec.checkCanceled();
exec.setProgress((double) currentIteration / (double) m_maxNrIterations, "Iteration " + currentIteration + " Total change of prototypes: " + totalchange);
totalchange = m_fcmAlgo.doOneIteration(exec);
currentIteration++;
}
if (m_measures) {
double[][] data = null;
if (m_fcmAlgo instanceof FCMAlgorithmMemory) {
data = ((FCMAlgorithmMemory) m_fcmAlgo).getConvertedData();
} else {
data = new double[nrRows][m_fcmAlgo.getDimension()];
int curRow = 0;
for (DataRow dRow : filteredtable) {
for (int j = 0; j < dRow.getNumCells(); j++) {
if (!(dRow.getCell(j).isMissing())) {
DoubleValue dv = (DoubleValue) dRow.getCell(j);
data[curRow][j] = dv.getDoubleValue();
} else {
data[curRow][j] = 0;
}
}
curRow++;
}
}
m_fcmmeasures = new FCMQualityMeasures(m_fcmAlgo.getClusterCentres(), m_fcmAlgo.getweightMatrix(), data, m_fuzzifier);
}
ColumnRearranger colRearranger = new ColumnRearranger(spec);
CellFactory membershipFac = new ClusterMembershipFactory(m_fcmAlgo);
colRearranger.append(membershipFac);
BufferedDataTable result = exec.createColumnRearrangeTable(indata, colRearranger, exec);
// don't write out the noise cluster!
double[][] clustercentres = m_fcmAlgo.getClusterCentres();
if (m_noise) {
double[][] cleaned = new double[clustercentres.length - 1][];
for (int i = 0; i < cleaned.length; i++) {
cleaned[i] = new double[clustercentres[i].length];
System.arraycopy(clustercentres[i], 0, cleaned[i], 0, clustercentres[i].length);
}
clustercentres = cleaned;
}
exec.setMessage("Creating PMML cluster model...");
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_enablePMMLInput ? (PMMLPortObject) inData[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
}
PMMLPortObjectSpec pmmlOutSpec = createPMMLPortObjectSpec(inPMMLSpec, spec, learningCols);
PMMLPortObject outPMMLPort = new PMMLPortObject(pmmlOutSpec, inPMMLPort, spec);
outPMMLPort.addModelTranslater(new PMMLClusterTranslator(ComparisonMeasure.squaredEuclidean, m_nrClusters, clustercentres, null, new LinkedHashSet<String>(pmmlOutSpec.getLearningFields())));
return new PortObject[] { result, outPMMLPort };
}
use of org.knime.core.data.container.CellFactory in project knime-core by knime.
the class PCAApplyNodeModel method execute.
/**
* Performs the PCA.
*
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
final PCAModelPortObject model = (PCAModelPortObject) inData[MODEL_INPORT];
final int dimensions = m_dimSelection.getNeededDimensions();
if (dimensions == -1) {
throw new IllegalArgumentException("Number of dimensions not correct configured");
}
if (m_failOnMissingValues.getBooleanValue()) {
for (final DataRow row : (DataTable) inData[DATA_INPORT]) {
for (int i = 0; i < m_inputColumnIndices.length; i++) {
if (row.getCell(m_inputColumnIndices[i]).isMissing()) {
throw new IllegalArgumentException("data table contains missing values");
}
}
}
}
final Matrix eigenvectors = EigenValue.getSortedEigenVectors(model.getEigenVectors(), model.getEigenvalues(), dimensions);
final DataColumnSpec[] specs = PCANodeModel.createAddTableSpec((DataTableSpec) inData[DATA_INPORT].getSpec(), dimensions);
final int dim = dimensions;
final CellFactory fac = new CellFactory() {
@Override
public DataCell[] getCells(final DataRow row) {
return PCANodeModel.convertInputRow(eigenvectors, row, model.getCenter(), m_inputColumnIndices, dim, m_failOnMissingValues.getBooleanValue());
}
@Override
public DataColumnSpec[] getColumnSpecs() {
return specs;
}
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor texec) {
texec.setProgress((double) curRowNr / rowCount, "converting input row " + curRowNr + " of " + rowCount);
}
};
final ColumnRearranger cr = new ColumnRearranger((DataTableSpec) inData[DATA_INPORT].getSpec());
cr.append(fac);
if (m_removeOriginalCols.getBooleanValue()) {
cr.remove(m_inputColumnNames);
}
final BufferedDataTable result = exec.createColumnRearrangeTable((BufferedDataTable) inData[DATA_INPORT], cr, exec);
final PortObject[] out = { result };
return out;
}
use of org.knime.core.data.container.CellFactory in project knime-core by knime.
the class CellReplacerNodeModel method createColumnRearranger.
private ColumnRearranger createColumnRearranger(final DataTableSpec spec, final DataTableSpec dictSpec, final BufferedDataTable dictTable, final ExecutionMonitor dictionaryInitExec) throws InvalidSettingsException {
String targetCol = m_targetColModel.getStringValue();
if (targetCol == null || targetCol.length() == 0) {
throw new InvalidSettingsException("No target column selected");
}
final int targetColIndex = spec.findColumnIndex(targetCol);
if (targetColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + targetCol + "\"");
}
final DataColumnSpec targetColSpec = spec.getColumnSpec(targetColIndex);
final int dictInputColIndex = dictSpec.findColumnIndex(m_dictInputColModel.getStringValue());
final boolean dictInputIsCollection;
if (m_dictInputColModel.useRowID()) {
dictInputIsCollection = false;
} else if (dictInputColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + m_dictInputColModel.getStringValue() + "\"");
} else {
DataColumnSpec inS = dictSpec.getColumnSpec(dictInputColIndex);
dictInputIsCollection = inS.getType().isCollectionType();
}
final int dictOutputColIndex = dictSpec.findColumnIndex(m_dictOutputColModel.getStringValue());
final DataType dictOutputColType;
if (m_dictOutputColModel.useRowID()) {
dictOutputColType = StringCell.TYPE;
} else {
if (dictOutputColIndex < 0) {
throw new InvalidSettingsException("No such column \"" + m_dictOutputColModel.getStringValue() + "\"");
}
dictOutputColType = dictSpec.getColumnSpec(dictOutputColIndex).getType();
}
final NoMatchPolicy noMatchPolicy = getNoMatchPolicy();
DataType outputType;
switch(noMatchPolicy) {
case Input:
outputType = DataType.getCommonSuperType(dictOutputColType, targetColSpec.getType());
break;
default:
outputType = dictOutputColType;
}
String newColName;
if (m_appendColumnModel.getBooleanValue()) {
String newName = m_appendColumnNameModel.getStringValue();
if (newName == null || newName.length() == 0) {
throw new InvalidSettingsException("No new column name given");
}
newColName = DataTableSpec.getUniqueColumnName(spec, newName);
} else {
newColName = targetColSpec.getName();
}
DataColumnSpecCreator replaceSpecCreator = new DataColumnSpecCreator(newColName, outputType);
CellFactory c = new SingleCellFactory(replaceSpecCreator.createSpec()) {
private Map<DataCell, DataCell> m_dictionaryMap;
@Override
public DataCell getCell(final DataRow row) {
try {
ensureInitDictionaryMap();
} catch (CanceledExecutionException e) {
// cancellation done by the framework
return DataType.getMissingCell();
}
DataCell cell = row.getCell(targetColIndex);
DataCell output = m_dictionaryMap.get(cell);
if (output == null) {
switch(noMatchPolicy) {
case Input:
return cell;
default:
return DataType.getMissingCell();
}
}
return output;
}
private void ensureInitDictionaryMap() throws CanceledExecutionException {
if (m_dictionaryMap == null) {
m_dictionaryMap = new HashMap<DataCell, DataCell>();
int i = 0;
double rowCount = dictTable.size();
for (DataRow r : dictTable) {
dictionaryInitExec.setProgress((i++) / rowCount, "Reading dictionary into memory, row " + i);
dictionaryInitExec.checkCanceled();
DataCell output = dictOutputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictOutputColIndex);
DataCell input = dictInputColIndex < 0 ? new StringCell(r.getKey().getString()) : r.getCell(dictInputColIndex);
if (input.isMissing()) {
addSearchPair(input, output);
} else if (dictInputIsCollection) {
CollectionDataValue v = (CollectionDataValue) input;
for (DataCell element : v) {
addSearchPair(element, output);
}
} else {
addSearchPair(input, output);
}
}
}
}
private void addSearchPair(final DataCell input, final DataCell output) {
if (m_dictionaryMap.put(input, output) != null) {
setWarningMessage("Duplicate search key \"" + input + "\"");
}
}
};
ColumnRearranger result = new ColumnRearranger(spec);
if (m_appendColumnModel.getBooleanValue()) {
result.append(c);
} else {
result.replace(c, targetColIndex);
}
return result;
}
use of org.knime.core.data.container.CellFactory in project knime-core by knime.
the class InteractiveHiLiteCollectorNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
m_data = inData[0];
if (m_annotationMap.isEmpty()) {
return new PortObject[] { m_data };
}
DataTableSpec inSpec = (DataTableSpec) m_data.getSpec();
final DataColumnSpec[] cspecs = createSpecs(inSpec);
ColumnRearranger cr = new ColumnRearranger(inSpec);
cr.append(new CellFactory() {
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
if (m_annotationMap.isEmpty()) {
return new DataCell[0];
}
DataCell[] cells = new DataCell[m_lastIndex + 1];
for (int i = 0; i < cells.length; i++) {
Map<Integer, String> map = m_annotationMap.get(row.getKey());
if (map == null) {
cells[i] = DataType.getMissingCell();
} else {
String str = map.get(i);
if (str == null) {
cells[i] = DataType.getMissingCell();
} else {
cells[i] = new StringCell(str);
}
}
}
return cells;
}
@Override
public DataColumnSpec[] getColumnSpecs() {
return cspecs;
}
/**
* {@inheritDoc}
*/
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor em) {
em.setProgress((double) curRowNr / rowCount);
}
});
return new BufferedDataTable[] { exec.createColumnRearrangeTable((BufferedDataTable) m_data, cr, exec) };
}
use of org.knime.core.data.container.CellFactory in project knime-core by knime.
the class XML2PMMLNodeModel method createColRearranger.
private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
if (m_xmlColumnName.getStringValue() == null) {
guessDefaultXMLColumn(spec);
}
String xmlColName = m_xmlColumnName.getStringValue();
String newColName = m_newColumnName.getStringValue();
final int colIndex = spec.findColumnIndex(xmlColName);
CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
DataColumnSpecCreator colSpecCreator;
if (newColName != null && !m_replaceColumn.getBooleanValue()) {
String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
} else {
colSpecCreator = new DataColumnSpecCreator(colSpec);
colSpecCreator.setType(PMMLCell.TYPE);
colSpecCreator.removeAllHandlers();
colSpecCreator.setDomain(null);
}
DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
ColumnRearranger rearranger = new ColumnRearranger(spec);
CellFactory fac = new SingleCellFactory(outColumnSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell cell = row.getCell(colIndex);
if (cell.isMissing()) {
return DataType.getMissingCell();
} else {
PMMLDocument pmmlDoc = null;
String failure = null;
XmlObject xmlDoc;
try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
if (xmlDoc instanceof PMMLDocument) {
pmmlDoc = (PMMLDocument) xmlDoc;
} else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
/* Parse the modified document and assign it to a
* PMMLDocument.*/
pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
} else {
failure = "No valid PMML v 3.x/4.0/4.1 document";
}
} catch (XmlException e) {
if (!m_failOnInvalid.getBooleanValue()) {
LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
}
failure = e.getMessage();
}
if (failure != null) {
m_failCounter.incrementAndGet();
if (m_failOnInvalid.getBooleanValue()) {
throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
} else {
return new MissingCell(failure);
}
} else {
try {
return PMMLCellFactory.create(pmmlDoc.toString());
} catch (Exception e) {
return new MissingCell(e.getMessage());
}
}
}
}
};
if (m_replaceColumn.getBooleanValue()) {
rearranger.replace(fac, colIndex);
} else {
rearranger.append(fac);
}
return rearranger;
}
Aggregations