Search in sources :

Example 6 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class PolyRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    final int colCount = inSpec.getNumColumns();
    String[] selectedCols = computeSelectedColumns(inSpec);
    Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
    m_colSelected = new boolean[colCount];
    for (int i = 0; i < colCount; i++) {
        m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
    }
    final int rowCount = inTable.getRowCount();
    String[] temp = new String[m_columnNames.length + 1];
    System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
    temp[temp.length - 1] = m_settings.getTargetColumn();
    FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
    final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
    PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
    Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
    try {
        PolyRegContent polyRegContent = learner.perform(inTable, exec);
        m_betas = fillBeta(polyRegContent);
        m_meanValues = polyRegContent.getMeans();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
        m_squaredError /= rowCount;
        if (polyRegContent.getWarningMessage() != null) {
            setWarningMessage(polyRegContent.getWarningMessage());
        }
        double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
        double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
        double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        return bdt;
    } catch (ModelSpecificationException e) {
        final String origWarning = getWarningMessage();
        final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
        setWarningMessage(warning);
        final ExecutionContext subExec = exec.createSubExecutionContext(.1);
        final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
        int rowIdx = 1;
        for (final String column : m_columnNames) {
            for (int d = 1; d <= m_settings.getDegree(); ++d) {
                empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
            }
        }
        empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
        double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
        Arrays.fill(nans, Double.NaN);
        m_betas = new double[nans.length];
        // Mean only for the linear tags
        m_meanValues = new double[nans.length / m_settings.getDegree()];
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        empty.close();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
        PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
        PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
        return bdt;
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DoubleCell(org.knime.core.data.def.DoubleCell) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) DataArray(org.knime.base.node.util.DataArray) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ModelSpecificationException(org.apache.commons.math3.stat.regression.ModelSpecificationException) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) HashSet(java.util.HashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StringCell(org.knime.core.data.def.StringCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 7 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class HierarchicalClusterNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] data, final ExecutionContext exec) throws Exception {
    // determine the indices of the selected columns
    List<String> inlcludedCols = m_selectedColumns.getIncludeList();
    int[] selectedColIndices = new int[inlcludedCols.size()];
    for (int count = 0; count < selectedColIndices.length; count++) {
        selectedColIndices[count] = data[0].getDataTableSpec().findColumnIndex(inlcludedCols.get(count));
    }
    BufferedDataTable inputData = data[0];
    if (inputData.size() > 65500) {
        throw new RuntimeException("At most 65,500 patterns can be clustered");
    }
    DataTable outputData = null;
    if (DistanceFunction.Names.Manhattan.toString().equals(m_distFunctionName.getStringValue())) {
        m_distFunction = ManhattanDist.MANHATTEN_DISTANCE;
    } else {
        m_distFunction = EuclideanDist.EUCLIDEAN_DISTANCE;
    }
    // generate initial clustering
    // which means that every data point is one cluster
    List<ClusterNode> clusters = initClusters(inputData, exec);
    // store the distance per each fusion step
    DataContainer fusionCont = exec.createDataContainer(createFusionSpec());
    int iterationStep = 0;
    final HalfFloatMatrix cache;
    if (m_cacheDistances.getBooleanValue()) {
        cache = new HalfFloatMatrix((int) inputData.size(), false);
        cache.fill(Float.NaN);
    } else {
        cache = null;
    }
    double max = inputData.size();
    // the number of clusters at the beginning is equal to the number
    // of data rows (each row is a cluster)
    int numberDataRows = clusters.size();
    while (clusters.size() > 1) {
        // checks if number clusters to generate output table is reached
        if (m_numClustersForOutput.getIntValue() == clusters.size()) {
            outputData = createResultTable(inputData, clusters, exec);
        }
        exec.setProgress((numberDataRows - clusters.size()) / (double) numberDataRows, clusters.size() + " clusters left to merge.");
        iterationStep++;
        exec.setProgress(iterationStep / max, "Iteration " + iterationStep + ", " + clusters.size() + " clusters remaining");
        // calculate distance between all clusters
        float currentSmallestDist = Float.MAX_VALUE;
        ClusterNode currentClosestCluster1 = null;
        ClusterNode currentClosestCluster2 = null;
        // subprogress for loop
        double availableProgress = (1.0 / numberDataRows);
        ExecutionContext subexec = exec.createSubExecutionContext(availableProgress);
        for (int i = 0; i < clusters.size(); i++) {
            exec.checkCanceled();
            ClusterNode node1 = clusters.get(i);
            for (int j = i + 1; j < clusters.size(); j++) {
                final float dist;
                ClusterNode node2 = clusters.get(j);
                // and average linkage supported.
                if (m_linkageType.getStringValue().equals(Linkage.SINGLE.name())) {
                    dist = calculateSingleLinkageDist(node1, node2, cache, selectedColIndices);
                } else if (m_linkageType.getStringValue().equals(Linkage.AVERAGE.name())) {
                    dist = calculateAverageLinkageDist(node1, node2, cache, selectedColIndices);
                } else {
                    dist = calculateCompleteLinkageDist(node1, node2, cache, selectedColIndices);
                }
                if (dist < currentSmallestDist) {
                    currentClosestCluster1 = node1;
                    currentClosestCluster2 = node2;
                    currentSmallestDist = dist;
                }
            }
        }
        subexec.setProgress(1.0);
        // make one cluster of the two closest
        ClusterNode newNode = new ClusterNode(currentClosestCluster1, currentClosestCluster2, currentSmallestDist);
        clusters.remove(currentClosestCluster1);
        clusters.remove(currentClosestCluster2);
        clusters.add(newNode);
        // store the distance per each fusion step
        fusionCont.addRowToTable(new DefaultRow(// row key
        Integer.toString(clusters.size()), // x-axis scatter plotter
        new IntCell(clusters.size()), // y-axis scatter plotter
        new DoubleCell(newNode.getDist())));
    // // print number clusters and their data points
    // LOGGER.debug("Iteration " + iterationStep + ":");
    // LOGGER.debug(" Number Clusters: " + clusters.size());
    // printClustersDataRows(clusters);
    }
    if (clusters.size() > 0) {
        m_rootNode = clusters.get(0);
    }
    fusionCont.close();
    // if there was no input data create an empty output data
    if (outputData == null) {
        outputData = createResultTable(inputData, clusters, exec);
    }
    m_dataArray = new DefaultDataArray(inputData, 1, (int) inputData.size());
    m_fusionTable = new DefaultDataArray(fusionCont.getTable(), 1, iterationStep);
    return new BufferedDataTable[] { exec.createBufferedDataTable(outputData, exec) };
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) SettingsModelString(org.knime.core.node.defaultnodesettings.SettingsModelString) IntCell(org.knime.core.data.def.IntCell) DataContainer(org.knime.core.data.container.DataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) BufferedDataTable(org.knime.core.node.BufferedDataTable) HalfFloatMatrix(org.knime.base.util.HalfFloatMatrix) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 8 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class ConditionalBoxPlotNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
    m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    double nrRows = inData[0].size();
    int rowCount = 0;
    int numericIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.numericColumn());
    int nominalIndex = inData[0].getDataTableSpec().findColumnIndex(m_settings.nominalColumn());
    Map<String, Map<Double, Set<RowKey>>> data = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
    // some default values .. if one column only has missing values.
    for (DataCell d : inData[0].getDataTableSpec().getColumnSpec(nominalIndex).getDomain().getValues()) {
        String name = ((StringValue) d).getStringValue();
        m_mildOutliers.put(name, new HashMap<Double, Set<RowKey>>());
        m_extremeOutliers.put(name, new HashMap<Double, Set<RowKey>>());
    }
    for (DataRow r : inData[0]) {
        exec.checkCanceled();
        exec.setProgress(rowCount++ / nrRows, "Separating...");
        if (!m_settings.showMissingValues()) {
            if (r.getCell(nominalIndex).isMissing()) {
                // missing cell in nominal values is unwanted?
                continue;
            }
        }
        String nominal = replaceSpaces(r.getCell(nominalIndex).toString());
        if (r.getCell(numericIndex).isMissing()) {
            // ignore missing cells in numeric column
            continue;
        }
        DoubleValue numeric = (DoubleValue) r.getCell(numericIndex);
        Map<Double, Set<RowKey>> map = data.get(nominal);
        if (map == null) {
            map = new LinkedHashMap<Double, Set<RowKey>>();
        }
        Set<RowKey> set = map.get(numeric.getDoubleValue());
        if (set == null) {
            set = new HashSet<RowKey>();
        }
        set.add(r.getKey());
        map.put(numeric.getDoubleValue(), set);
        data.put(nominal, map);
    }
    List<String> keys = new ArrayList<String>(data.keySet());
    boolean ignoreMissingValues = false;
    if (m_settings.showMissingValues() && !keys.contains(DataType.getMissingCell().toString())) {
        // we promised to create data for missing values..
        // if there aren't any.. we have to create them ourselves
        setWarningMessage("No missing values found.");
        ignoreMissingValues = true;
    }
    Collections.sort(keys);
    DataColumnSpec[] colSpecs = createColumnSpec(inData[0].getDataTableSpec().getColumnSpec(nominalIndex), ignoreMissingValues);
    if (keys.size() == 0) {
        setWarningMessage("All classes are empty.");
    }
    int dataSetNr = 0;
    // for (String d : keys) {
    for (DataColumnSpec dcs : colSpecs) {
        String d = dcs.getName();
        if (data.get(d) == null || keys.size() == 0) {
            dataSetNr++;
            continue;
        }
        exec.checkCanceled();
        exec.setProgress(dataSetNr / (double) keys.size(), "Creating statistics");
        Map<Double, Set<RowKey>> extremeOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        Map<Double, Set<RowKey>> mildOutliers = new LinkedHashMap<Double, Set<RowKey>>();
        double[] stats = calculateStatistic(data.get(d), mildOutliers, extremeOutliers);
        double minimum = stats[BoxPlotNodeModel.MIN];
        double maximum = stats[BoxPlotNodeModel.MAX];
        DataColumnSpecCreator creator = new DataColumnSpecCreator(colSpecs[dataSetNr]);
        creator.setDomain(new DataColumnDomainCreator(new DoubleCell(minimum), new DoubleCell(maximum)).createDomain());
        colSpecs[dataSetNr] = creator.createSpec();
        m_statistics.put(colSpecs[dataSetNr], stats);
        m_mildOutliers.put(d, mildOutliers);
        m_extremeOutliers.put(d, extremeOutliers);
        dataSetNr++;
    }
    DataTableSpec dts = new DataTableSpec("MyTempTable", colSpecs);
    DataContainer cont = new DataContainer(dts);
    cont.close();
    m_dataArray = new DefaultDataArray(cont.getTable(), 1, 2);
    cont.dispose();
    if (ignoreMissingValues) {
        DataColumnSpec[] temp = new DataColumnSpec[colSpecs.length + 1];
        DataColumnSpec missing = new DataColumnSpecCreator(DataType.getMissingCell().toString(), DataType.getMissingCell().getType()).createSpec();
        int i = 0;
        while (missing.getName().compareTo(colSpecs[i].getName()) > 0) {
            temp[i] = colSpecs[i];
            i++;
        }
        temp[i++] = missing;
        while (i < temp.length) {
            temp[i] = colSpecs[i - 1];
            i++;
        }
        colSpecs = temp;
    }
    /* Save inSpec of the numeric column to provide the view a way to
         * consider the input domain for normalization. */
    m_numColSpec = inData[0].getDataTableSpec().getColumnSpec(numericIndex);
    return new BufferedDataTable[] { createOutputTable(inData[0].getDataTableSpec(), colSpecs, exec).getTable() };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) HashSet(java.util.HashSet) Set(java.util.Set) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ArrayList(java.util.ArrayList) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataContainer(org.knime.core.data.container.DataContainer) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) StringValue(org.knime.core.data.StringValue) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DoubleValue(org.knime.core.data.DoubleValue) DataCell(org.knime.core.data.DataCell) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 9 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class BoxPlotNodeModel method loadInternals.

/**
 * {@inheritDoc}
 */
@Override
protected void loadInternals(final File nodeInternDir, final ExecutionMonitor exec) throws IOException, CanceledExecutionException {
    try {
        File f = new File(nodeInternDir, FILE_NAME);
        FileInputStream fis = new FileInputStream(f);
        NodeSettingsRO settings = NodeSettings.loadFromXML(fis);
        m_statistics = new LinkedHashMap<DataColumnSpec, double[]>();
        m_mildOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
        m_extremeOutliers = new LinkedHashMap<String, Map<Double, Set<RowKey>>>();
        int nrOfCols = settings.getInt(CFG_NR_COLS);
        for (int i = 0; i < nrOfCols; i++) {
            NodeSettings subSetting = (NodeSettings) settings.getConfig(CFG_COL + i);
            DataColumnSpec spec = DataColumnSpec.load(subSetting);
            double[] stats = settings.getDoubleArray(CFG_STATS + spec.getName());
            m_statistics.put(spec, stats);
            loadOutliers(settings, spec);
        }
        File data = new File(nodeInternDir, ARRAY_FILE);
        ContainerTable table = DataContainer.readFromZip(data);
        m_array = new DefaultDataArray(table, 1, 2, exec);
    } catch (Exception e) {
        LOGGER.warn(e);
        throw new IOException(e.getMessage());
    }
}
Also used : RowKey(org.knime.core.data.RowKey) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) ContainerTable(org.knime.core.data.container.ContainerTable) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) IOException(java.io.IOException) NodeSettings(org.knime.core.node.NodeSettings) DataColumnSpec(org.knime.core.data.DataColumnSpec) NodeSettingsRO(org.knime.core.node.NodeSettingsRO) File(java.io.File) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Example 10 with DefaultDataArray

use of org.knime.base.node.util.DefaultDataArray in project knime-core by knime.

the class Rule2DNodeModel method execute.

/**
 * Reads in the data and the rules and simply stores them locally.
 *
 * @see org.knime.core.node.NodeModel#execute(BufferedDataTable[],
 *      ExecutionContext)
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    exec.setProgress(0.2, "Process input data...");
    assert inData != null;
    assert inData.length == 2;
    m_data = new DefaultDataArray(inData[DATA_INPORT], m_dataStartRow, m_dataMaxRow);
    m_fuzzyRules = new DefaultDataArray(inData[RULES_INPORT], m_ruleStartRow, m_ruleMaxRow);
    LOGGER.debug("model rules: " + m_fuzzyRules);
    exec.setProgress(0.8, "Process input data...");
    return new BufferedDataTable[] {};
}
Also used : DefaultDataArray(org.knime.base.node.util.DefaultDataArray) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Aggregations

DefaultDataArray (org.knime.base.node.util.DefaultDataArray)32 BufferedDataTable (org.knime.core.node.BufferedDataTable)16 File (java.io.File)14 ContainerTable (org.knime.core.data.container.ContainerTable)13 DataTableSpec (org.knime.core.data.DataTableSpec)12 FileInputStream (java.io.FileInputStream)10 IOException (java.io.IOException)9 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)9 DataRow (org.knime.core.data.DataRow)8 DataColumnSpec (org.knime.core.data.DataColumnSpec)7 HashSet (java.util.HashSet)6 DataArray (org.knime.base.node.util.DataArray)6 DataTable (org.knime.core.data.DataTable)6 Map (java.util.Map)5 RowKey (org.knime.core.data.RowKey)5 NodeSettingsRO (org.knime.core.node.NodeSettingsRO)5 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)5 PortObject (org.knime.core.node.port.PortObject)5 BufferedInputStream (java.io.BufferedInputStream)4 ArrayList (java.util.ArrayList)4