Search in sources :

Example 6 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class SVMPredictor method getCells.

/**
 * {@inheritDoc}
 */
@Override
public DataCell[] getCells(final DataRow row) {
    ArrayList<Double> values = new ArrayList<Double>();
    for (int i = 0; i < m_colindices.length; i++) {
        if (row.getCell(m_colindices[i]).isMissing()) {
            if (m_appendProbabilities) {
                DataCell[] ret = new DataCell[1 + m_svms.length];
                Arrays.fill(ret, new MissingCell("Missing value in input data."));
                return ret;
            }
            return new DataCell[] { DataType.getMissingCell() };
        }
        DoubleValue dv = (DoubleValue) row.getCell(m_colindices[i]);
        values.add(dv.getDoubleValue());
    }
    String classvalue = doPredict(values);
    if (m_appendProbabilities) {
        DataCell[] ret = new DataCell[m_svms.length + 1];
        double[] probabilities = computeProbabilities(values);
        assert ret.length == probabilities.length + 1 : ret.length + " vs. " + (probabilities.length + 1);
        for (int i = ret.length - 1; i-- > 0; ) {
            ret[i] = new DoubleCell(probabilities[i]);
        }
        ret[probabilities.length] = new StringCell(classvalue);
        return ret;
    }
    return new DataCell[] { new StringCell(classvalue) };
}
Also used : MissingCell(org.knime.core.data.MissingCell) DoubleValue(org.knime.core.data.DoubleValue) StringCell(org.knime.core.data.def.StringCell) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell)

Example 7 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class XML2PMMLNodeModel method createColRearranger.

private ColumnRearranger createColRearranger(final DataTableSpec spec) throws InvalidSettingsException {
    if (m_xmlColumnName.getStringValue() == null) {
        guessDefaultXMLColumn(spec);
    }
    String xmlColName = m_xmlColumnName.getStringValue();
    String newColName = m_newColumnName.getStringValue();
    final int colIndex = spec.findColumnIndex(xmlColName);
    CheckUtils.checkSetting(colIndex >= 0, "Column: '%s' does not exist anymore.", xmlColName);
    final DataColumnSpec colSpec = spec.getColumnSpec(colIndex);
    CheckUtils.checkSetting(colSpec.getType().isCompatible(StringValue.class), "Selected column '%s' is not string/xml-compatible", xmlColName);
    DataColumnSpecCreator colSpecCreator;
    if (newColName != null && !m_replaceColumn.getBooleanValue()) {
        String newName = DataTableSpec.getUniqueColumnName(spec, newColName);
        colSpecCreator = new DataColumnSpecCreator(newName, PMMLCell.TYPE);
    } else {
        colSpecCreator = new DataColumnSpecCreator(colSpec);
        colSpecCreator.setType(PMMLCell.TYPE);
        colSpecCreator.removeAllHandlers();
        colSpecCreator.setDomain(null);
    }
    DataColumnSpec outColumnSpec = colSpecCreator.createSpec();
    ColumnRearranger rearranger = new ColumnRearranger(spec);
    CellFactory fac = new SingleCellFactory(outColumnSpec) {

        @Override
        public DataCell getCell(final DataRow row) {
            DataCell cell = row.getCell(colIndex);
            if (cell.isMissing()) {
                return DataType.getMissingCell();
            } else {
                PMMLDocument pmmlDoc = null;
                String failure = null;
                XmlObject xmlDoc;
                try (LockedSupplier<Document> supplier = ((XMLValue<Document>) cell).getDocumentSupplier()) {
                    xmlDoc = XmlObject.Factory.parse(supplier.get().cloneNode(true));
                    if (xmlDoc instanceof PMMLDocument) {
                        pmmlDoc = (PMMLDocument) xmlDoc;
                    } else if (PMMLUtils.isOldKNIMEPMML(xmlDoc) || PMMLUtils.is4_1PMML(xmlDoc)) {
                        String updatedPMML = PMMLUtils.getUpdatedVersionAndNamespace(xmlDoc);
                        /* Parse the modified document and assign it to a
                                 * PMMLDocument.*/
                        pmmlDoc = PMMLDocument.Factory.parse(updatedPMML);
                    } else {
                        failure = "No valid PMML v 3.x/4.0/4.1 document";
                    }
                } catch (XmlException e) {
                    if (!m_failOnInvalid.getBooleanValue()) {
                        LOGGER.error("Invalid PMML in row " + row.getKey() + ": " + e.getMessage(), e);
                    }
                    failure = e.getMessage();
                }
                if (failure != null) {
                    m_failCounter.incrementAndGet();
                    if (m_failOnInvalid.getBooleanValue()) {
                        throw new RuntimeException("Invalid PMML in row " + row.getKey() + ": " + failure);
                    } else {
                        return new MissingCell(failure);
                    }
                } else {
                    try {
                        return PMMLCellFactory.create(pmmlDoc.toString());
                    } catch (Exception e) {
                        return new MissingCell(e.getMessage());
                    }
                }
            }
        }
    };
    if (m_replaceColumn.getBooleanValue()) {
        rearranger.replace(fac, colIndex);
    } else {
        rearranger.append(fac);
    }
    return rearranger;
}
Also used : DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) PMMLDocument(org.dmg.pmml.PMMLDocument) Document(org.w3c.dom.Document) DataRow(org.knime.core.data.DataRow) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) XmlException(org.apache.xmlbeans.XmlException) DataColumnSpec(org.knime.core.data.DataColumnSpec) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) MissingCell(org.knime.core.data.MissingCell) XmlException(org.apache.xmlbeans.XmlException) DataCell(org.knime.core.data.DataCell) XmlObject(org.apache.xmlbeans.XmlObject) PMMLDocument(org.dmg.pmml.PMMLDocument) XMLValue(org.knime.core.data.xml.XMLValue) StringValue(org.knime.core.data.StringValue) PMMLCellFactory(org.knime.core.data.xml.PMMLCellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory) CellFactory(org.knime.core.data.container.CellFactory) SingleCellFactory(org.knime.core.data.container.SingleCellFactory)

Example 8 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class ListFiles method addLocationToContainer.

/**
 * Adds a File to the table.
 *
 * @param file
 */
private void addLocationToContainer(final URL url) throws UnsupportedEncodingException, URISyntaxException {
    DataCell[] row = new DataCell[2];
    if ("file".equalsIgnoreCase(url.getProtocol())) {
        row[0] = new StringCell(Paths.get(url.toURI()).toString());
    } else {
        row[0] = new MissingCell("URL is remote and does not have a local location");
    }
    row[1] = new StringCell(url.toString());
    m_dc.addRowToTable(new DefaultRow(RowKey.createRowKey(m_currentRowID), row));
    m_currentRowID++;
}
Also used : StringCell(org.knime.core.data.def.StringCell) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 9 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class FixedWidthRowIterator method next.

/**
 * {@inheritDoc}
 */
@Override
public DataRow next() {
    int rowLength = m_tableSpec.getNumColumns();
    int createdCols = 0;
    String token = null;
    String rowHeader;
    if (!hasNext()) {
        throw new NoSuchElementException("The row iterator proceeded beyond the last line of '" + m_nodeSettings.getFileLocation().toString() + "'.");
    }
    DataCell[] row;
    if (!m_nodeSettings.getHasRowHeader()) {
        rowHeader = "Row" + m_lineNumber++;
        row = new DataCell[rowLength];
    } else {
        rowHeader = m_tokenizer.nextToken();
        row = new DataCell[rowLength];
    }
    DataColumnSpec cSpec = null;
    while (createdCols < rowLength) {
        m_dataCellFactory.setMissingValuePattern(m_missingValuePatterns[createdCols]);
        m_dataCellFactory.setFormatParameter(m_formatParameters[createdCols]);
        token = m_tokenizer.nextToken();
        if (!m_tokenizer.getReachedEndOfLine()) {
            cSpec = m_tableSpec.getColumnSpec(createdCols);
            DataCell result = m_dataCellFactory.createDataCellOfType(cSpec.getType(), token);
            if (result != null) {
                row[createdCols] = result;
            } else {
                // something went wrong during cell creation.
                // figure out which column we were trying to read
                int errCol = 0;
                while (errCol < row.length && row[errCol] != null) {
                    errCol++;
                }
                // create an error message
                String errorMsg = m_dataCellFactory.getErrorMessage();
                errorMsg += " In line " + m_tokenizer.getLineNumber() + " (" + rowHeader + ") at column #" + errCol + " ('" + m_tableSpec.getColumnSpec(errCol).getName() + "').";
                assert rowHeader != null;
                // wrong, and close the stream
                throw prepareForException(errorMsg, m_tokenizer.getLineNumber(), rowHeader, row);
            }
        } else {
            // no more characters in this line but we need more columns
            // just add missing cells
            row[createdCols] = new MissingCell(null);
        }
        createdCols++;
    }
    double readBytes = m_inputStream.getNumberOfBytesRead();
    if (m_exec != null && m_inputStream.getFileSize() > 0 && readBytes / PROGRESS_JUNK_SIZE > m_lastReport) {
        // assert readBytes <= m_frSettings.getDataFileSize();
        m_exec.setProgress(readBytes / m_inputStream.getFileSize());
        m_lastReport++;
    }
    return new DefaultRow(rowHeader, row);
}
Also used : DataColumnSpec(org.knime.core.data.DataColumnSpec) MissingCell(org.knime.core.data.MissingCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) NoSuchElementException(java.util.NoSuchElementException)

Example 10 with MissingCell

use of org.knime.core.data.MissingCell in project knime-core by knime.

the class MissingValueHandling3TableIterator method handleMissing.

/* Does the missing value handling on a row. */
private DataRow handleMissing(final DataRow row) {
    DataCell[] cells = new DataCell[row.getNumCells()];
    for (int i = 0; i < row.getNumCells(); i++) {
        MissingValueHandling2ColSetting colset = m_table.getColSetting(i);
        DataCell oldCell = row.getCell(i);
        DataCell newCell;
        if (oldCell.isMissing()) {
            switch(colset.getMethod()) {
                case MissingValueHandling2ColSetting.METHOD_NO_HANDLING:
                    newCell = oldCell;
                    break;
                case MissingValueHandling2ColSetting.METHOD_FIX_VAL:
                    newCell = m_table.getColSetting(i).getFixCell();
                    assert (newCell != null);
                    break;
                case MissingValueHandling2ColSetting.METHOD_MOST_FREQUENT:
                    newCell = m_table.getMostFrequent(i);
                    break;
                case MissingValueHandling2ColSetting.METHOD_MAX:
                    newCell = m_table.getMax(i);
                    break;
                case MissingValueHandling2ColSetting.METHOD_MIN:
                    newCell = m_table.getMin(i);
                    break;
                case MissingValueHandling2ColSetting.METHOD_MEAN:
                    // in contrast to the above, it will return
                    // a newly generate value, thus, only a double
                    double mean = m_table.getMean(i);
                    if (colset.getType() == MissingValueHandling2ColSetting.TYPE_DOUBLE) {
                        if (Double.isNaN(mean) && m_table.getNumberNaNValues(i) == 0) {
                            newCell = new MissingCell("Calculated mean is not a number");
                        } else {
                            newCell = new DoubleCell(mean);
                        }
                    } else {
                        assert colset.getType() == MissingValueHandling2ColSetting.TYPE_INT;
                        if (Double.isNaN(mean)) {
                            newCell = new MissingCell("Calculated mean is not a number");
                        } else {
                            newCell = new IntCell((int) Math.round(mean));
                        }
                    }
                    break;
                case MissingValueHandling2ColSetting.METHOD_IGNORE_ROWS:
                    assert false : "That should have been filtered.";
                    newCell = oldCell;
                    break;
                default:
                    throw new RuntimeException("Invalid method!");
            }
        } else {
            newCell = oldCell;
        }
        cells[i] = newCell;
    }
    RowKey key = row.getKey();
    return new DefaultRow(key, cells);
}
Also used : MissingCell(org.knime.core.data.MissingCell) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) IntCell(org.knime.core.data.def.IntCell)

Aggregations

MissingCell (org.knime.core.data.MissingCell)18 DataCell (org.knime.core.data.DataCell)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)6 DoubleCell (org.knime.core.data.def.DoubleCell)6 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)5 DefaultRow (org.knime.core.data.def.DefaultRow)5 StringCell (org.knime.core.data.def.StringCell)5 ArrayList (java.util.ArrayList)4 DataRow (org.knime.core.data.DataRow)4 RowKey (org.knime.core.data.RowKey)4 IntCell (org.knime.core.data.def.IntCell)4 LinkedHashMap (java.util.LinkedHashMap)3 DataType (org.knime.core.data.DataType)3 DoubleValue (org.knime.core.data.DoubleValue)3 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)3 IOException (java.io.IOException)2 Test (org.junit.Test)2 BinaryObjectDataCell (org.knime.core.data.blob.BinaryObjectDataCell)2 ListCell (org.knime.core.data.collection.ListCell)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2