use of org.knime.core.data.MissingCell in project knime-core by knime.
the class SVMPredictor method getCells.
/**
* {@inheritDoc}
*/
@Override
public DataCell[] getCells(final DataRow row) {
ArrayList<Double> values = new ArrayList<Double>();
for (int i = 0; i < m_colindices.length; i++) {
if (row.getCell(m_colindices[i]).isMissing()) {
if (m_appendProbabilities) {
DataCell[] ret = new DataCell[1 + m_svms.length];
Arrays.fill(ret, new MissingCell("Missing value in input data."));
return ret;
}
return new DataCell[] { DataType.getMissingCell() };
}
DoubleValue dv = (DoubleValue) row.getCell(m_colindices[i]);
values.add(dv.getDoubleValue());
}
String classvalue = doPredict(values);
if (m_appendProbabilities) {
DataCell[] ret = new DataCell[m_svms.length + 1];
double[] probabilities = computeProbabilities(values);
assert ret.length == probabilities.length + 1 : ret.length + " vs. " + (probabilities.length + 1);
for (int i = ret.length - 1; i-- > 0; ) {
ret[i] = new DoubleCell(probabilities[i]);
}
ret[probabilities.length] = new StringCell(classvalue);
return ret;
}
return new DataCell[] { new StringCell(classvalue) };
}
use of org.knime.core.data.MissingCell in project knime-core by knime.
the class XML2PMMLNodeModel method createColRearranger.
private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
if (m_xmlColumnName.getStringValue() == null) {
guessDefaultXMLColumn(spec);
}
String xmlColName = m_xmlColumnName.getStringValue();
String newColName = m_newColumnName.getStringValue();
final int colIndex = spec.findColumnIndex(xmlColName);
CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
DataColumnSpecCreator colSpecCreator;
if (newColName != null && !m_replaceColumn.getBooleanValue()) {
String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
} else {
colSpecCreator = new DataColumnSpecCreator(colSpec);
colSpecCreator.setType(PMMLCell.TYPE);
colSpecCreator.removeAllHandlers();
colSpecCreator.setDomain(null);
}
DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
ColumnRearranger rearranger = new ColumnRearranger(spec);
CellFactory fac = new SingleCellFactory(outColumnSpec) {
@Override
public DataCell getCell(final DataRow row) {
DataCell cell = row.getCell(colIndex);
if (cell.isMissing()) {
return DataType.getMissingCell();
} else {
PMMLDocument pmmlDoc = null;
String failure = null;
XmlObject xmlDoc;
try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
if (xmlDoc instanceof PMMLDocument) {
pmmlDoc = (PMMLDocument) xmlDoc;
} else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
/* Parse the modified document and assign it to a
* PMMLDocument.*/
pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
} else {
failure = "No valid PMML v 3.x/4.0/4.1 document";
}
} catch (XmlException e) {
if (!m_failOnInvalid.getBooleanValue()) {
LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
}
failure = e.getMessage();
}
if (failure != null) {
m_failCounter.incrementAndGet();
if (m_failOnInvalid.getBooleanValue()) {
throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
} else {
return new MissingCell(failure);
}
} else {
try {
return PMMLCellFactory.create(pmmlDoc.toString());
} catch (Exception e) {
return new MissingCell(e.getMessage());
}
}
}
}
};
if (m_replaceColumn.getBooleanValue()) {
rearranger.replace(fac, colIndex);
} else {
rearranger.append(fac);
}
return rearranger;
}
use of org.knime.core.data.MissingCell in project knime-core by knime.
the class ListFiles method addLocationToContainer.
/**
* Adds a File to the table.
*
* @param file
*/
private void addLocationToContainer(final URL url) throws UnsupportedEncodingException, URISyntaxException {
DataCell[] row = new DataCell[2];
if ("file".equalsIgnoreCase(url.getProtocol())) {
row[0] = new StringCell(Paths.get(url.toURI()).toString());
} else {
row[0] = new MissingCell("URL is remote and does not have a local location");
}
row[1] = new StringCell(url.toString());
m_dc.addRowToTable(new DefaultRow(RowKey.createRowKey(m_currentRowID), row));
m_currentRowID++;
}
use of org.knime.core.data.MissingCell in project knime-core by knime.
the class FixedWidthRowIterator method next.
/**
* {@inheritDoc}
*/
@Override
public DataRow next() {
int rowLength = m_tableSpec.getNumColumns();
int createdCols = 0;
String token = null;
String rowHeader;
if (!hasNext()) {
throw new NoSuchElementException("The row iterator proceeded beyond the last line of '" + m_nodeSettings.getFileLocation().toString() + "'.");
}
DataCell[] row;
if (!m_nodeSettings.getHasRowHeader()) {
rowHeader = "Row" + m_lineNumber++;
row = new DataCell[rowLength];
} else {
rowHeader = m_tokenizer.nextToken();
row = new DataCell[rowLength];
}
DataColumnSpec cSpec = null;
while (createdCols < rowLength) {
m_dataCellFactory.setMissingValuePattern(m_missingValuePatterns[createdCols]);
m_dataCellFactory.setFormatParameter(m_formatParameters[createdCols]);
token = m_tokenizer.nextToken();
if (!m_tokenizer.getReachedEndOfLine()) {
cSpec = m_tableSpec.getColumnSpec(createdCols);
DataCell result = m_dataCellFactory.createDataCellOfType(cSpec.getType(), token);
if (result != null) {
row[createdCols] = result;
} else {
// something went wrong during cell creation.
// figure out which column we were trying to read
int errCol = 0;
while (errCol < row.length && row[errCol] != null) {
errCol++;
}
// create an error message
String errorMsg = m_dataCellFactory.getErrorMessage();
errorMsg += " In line " + m_tokenizer.getLineNumber() + " (" + rowHeader + ") at column #" + errCol + " ('" + m_tableSpec.getColumnSpec(errCol).getName() + "').";
assert rowHeader != null;
// wrong, and close the stream
throw prepareForException(errorMsg, m_tokenizer.getLineNumber(), rowHeader, row);
}
} else {
// no more characters in this line but we need more columns
// just add missing cells
row[createdCols] = new MissingCell(null);
}
createdCols++;
}
double readBytes = m_inputStream.getNumberOfBytesRead();
if (m_exec != null && m_inputStream.getFileSize() > 0 && readBytes / PROGRESS_JUNK_SIZE > m_lastReport) {
// assert readBytes <= m_frSettings.getDataFileSize();
m_exec.setProgress(readBytes / m_inputStream.getFileSize());
m_lastReport++;
}
return new DefaultRow(rowHeader, row);
}
use of org.knime.core.data.MissingCell in project knime-core by knime.
the class MissingValueHandling3TableIterator method handleMissing.
/* Does the missing value handling on a row. */
private DataRow handleMissing(final DataRow row) {
DataCell[] cells = new DataCell[row.getNumCells()];
for (int i = 0; i < row.getNumCells(); i++) {
MissingValueHandling2ColSetting colset = m_table.getColSetting(i);
DataCell oldCell = row.getCell(i);
DataCell newCell;
if (oldCell.isMissing()) {
switch(colset.getMethod()) {
case MissingValueHandling2ColSetting.METHOD_NO_HANDLING:
newCell = oldCell;
break;
case MissingValueHandling2ColSetting.METHOD_FIX_VAL:
newCell = m_table.getColSetting(i).getFixCell();
assert (newCell != null);
break;
case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
newCell = m_table.getMostFrequent(i);
break;
case MissingValueHandling2ColSetting.METHOD_MAX:
newCell = m_table.getMax(i);
break;
case MissingValueHandling2ColSetting.METHOD_MIN:
newCell = m_table.getMin(i);
break;
case MissingValueHandling2ColSetting.METHOD_MEAN:
// in contrast to the above, it will return
// a newly generate value, thus, only a double
double mean = m_table.getMean(i);
if (colset.getType() == MissingValueHandling2ColSetting.TYPE_DOUBLE) {
if (Double.isNaN(mean) && m_table.getNumberNaNValues(i) == 0) {
newCell = new MissingCell("Calculated mean is not a number");
} else {
newCell = new DoubleCell(mean);
}
} else {
assert colset.getType() == MissingValueHandling2ColSetting.TYPE_INT;
if (Double.isNaN(mean)) {
newCell = new MissingCell("Calculated mean is not a number");
} else {
newCell = new IntCell((int) Math.round(mean));
}
}
break;
case MissingValueHandling2ColSetting.METHOD_IGNORE_ROWS:
assert false : "That should have been filtered.";
newCell = oldCell;
break;
default:
throw new RuntimeException("Invalid method!");
}
} else {
newCell = oldCell;
}
cells[i] = newCell;
}
RowKey key = row.getKey();
return new DefaultRow(key, cells);
}
Aggregations