Search in sources :

Example 1 with FilterColumnTable

use of org.knime.base.data.filter.column.FilterColumnTable in project knime-core by knime.

the class PolyRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    final int colCount = inSpec.getNumColumns();
    String[] selectedCols = computeSelectedColumns(inSpec);
    Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
    m_colSelected = new boolean[colCount];
    for (int i = 0; i < colCount; i++) {
        m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
    }
    final int rowCount = inTable.getRowCount();
    String[] temp = new String[m_columnNames.length + 1];
    System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
    temp[temp.length - 1] = m_settings.getTargetColumn();
    FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
    final DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inData[1] : null;
    PortObjectSpec[] outputSpec = configure((inPMMLPort == null) ? new PortObjectSpec[] { inData[0].getSpec(), null } : new PortObjectSpec[] { inData[0].getSpec(), inPMMLPort.getSpec() });
    Learner learner = new Learner((PMMLPortObjectSpec) outputSpec[0], 0d, m_settings.getMissingValueHandling() == MissingValueHandling.fail, m_settings.getDegree());
    try {
        PolyRegContent polyRegContent = learner.perform(inTable, exec);
        m_betas = fillBeta(polyRegContent);
        m_meanValues = polyRegContent.getMeans();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        PortObject[] bdt = new PortObject[] { createPMMLModel(inPMMLPort, inSpec), exec.createColumnRearrangeTable(inTable, crea, exec.createSilentSubExecutionContext(.2)), polyRegContent.createTablePortObject(exec.createSubExecutionContext(0.2)) };
        m_squaredError /= rowCount;
        if (polyRegContent.getWarningMessage() != null) {
            setWarningMessage(polyRegContent.getWarningMessage());
        }
        double[] stdErrors = PolyRegViewData.mapToArray(polyRegContent.getStandardErrors(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptStdErr());
        double[] tValues = PolyRegViewData.mapToArray(polyRegContent.getTValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptTValue());
        double[] pValues = PolyRegViewData.mapToArray(polyRegContent.getPValues(), m_columnNames, m_settings.getDegree(), polyRegContent.getInterceptPValue());
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, stdErrors, tValues, pValues, m_squaredError, polyRegContent.getAdjustedRSquared(), m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        return bdt;
    } catch (ModelSpecificationException e) {
        final String origWarning = getWarningMessage();
        final String warning = (origWarning != null && !origWarning.isEmpty()) ? (origWarning + "\n") : "" + e.getMessage();
        setWarningMessage(warning);
        final ExecutionContext subExec = exec.createSubExecutionContext(.1);
        final BufferedDataContainer empty = subExec.createDataContainer(STATS_SPEC);
        int rowIdx = 1;
        for (final String column : m_columnNames) {
            for (int d = 1; d <= m_settings.getDegree(); ++d) {
                empty.addRowToTable(new DefaultRow("Row" + rowIdx++, new StringCell(column), new IntCell(d), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
            }
        }
        empty.addRowToTable(new DefaultRow("Row" + rowIdx, new StringCell("Intercept"), new IntCell(0), new DoubleCell(0.0d), DataType.getMissingCell(), DataType.getMissingCell(), DataType.getMissingCell()));
        double[] nans = new double[m_columnNames.length * m_settings.getDegree() + 1];
        Arrays.fill(nans, Double.NaN);
        m_betas = new double[nans.length];
        // Mean only for the linear tags
        m_meanValues = new double[nans.length / m_settings.getDegree()];
        m_viewData = new PolyRegViewData(m_meanValues, m_betas, nans, nans, nans, m_squaredError, Double.NaN, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn(), rowContainer);
        empty.close();
        ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
        crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
        BufferedDataTable rearrangerTable = exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6));
        PMMLPortObject model = createPMMLModel(inPMMLPort, inTable.getDataTableSpec());
        PortObject[] bdt = new PortObject[] { model, rearrangerTable, empty.getTable() };
        return bdt;
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) DoubleCell(org.knime.core.data.def.DoubleCell) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) DataArray(org.knime.base.node.util.DataArray) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ModelSpecificationException(org.apache.commons.math3.stat.regression.ModelSpecificationException) IntCell(org.knime.core.data.def.IntCell) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObjectSpec(org.knime.core.node.port.pmml.PMMLPortObjectSpec) PortObjectSpec(org.knime.core.node.port.PortObjectSpec) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) HashSet(java.util.HashSet) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) ExecutionContext(org.knime.core.node.ExecutionContext) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) StringCell(org.knime.core.data.def.StringCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 2 with FilterColumnTable

use of org.knime.base.data.filter.column.FilterColumnTable in project knime-core by knime.

the class JoinerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataContainer dc = exec.createDataContainer(JoinedTable.createSpec(inData[0].getDataTableSpec(), inData[1].getDataTableSpec(), m_method, m_suffix));
    DataTable leftTable = inData[0];
    DataTable rightTable = inData[1];
    // in the output
    if (JoinedTable.METHOD_FILTER.equals(m_method)) {
        DataTableSpec leftTableSpec = leftTable.getDataTableSpec();
        DataTableSpec rightTableSpec = rightTable.getDataTableSpec();
        LinkedHashSet<String> leftHash = new LinkedHashSet<String>();
        for (DataColumnSpec c : leftTableSpec) {
            leftHash.add(c.getName());
        }
        LinkedHashSet<String> rightHash = new LinkedHashSet<String>();
        for (DataColumnSpec c : rightTableSpec) {
            rightHash.add(c.getName());
        }
        rightHash.removeAll(leftHash);
        String[] survivors = rightHash.toArray(new String[rightHash.size()]);
        if (survivors.length < rightTableSpec.getNumColumns()) {
            rightTable = new FilterColumnTable(rightTable, survivors);
        }
    }
    final BitSet rightRows = new BitSet(inData[1].getRowCount());
    final LinkedHashMap<RowKey, SoftReference<Helper>> map = new LinkedHashMap<RowKey, SoftReference<Helper>>(1024);
    m_leftRows = 0;
    m_outputRows = 0;
    m_leftIt = null;
    m_rightIt = null;
    m_firstMapHelper = null;
    m_exec = exec;
    if (m_ignoreMissingRows) {
        m_max = Math.min(inData[0].getRowCount(), inData[1].getRowCount());
    } else {
        m_max = Math.max(inData[0].getRowCount(), inData[1].getRowCount());
    }
    while (true) {
        if (!readLeftChunk(leftTable, map)) {
            if (!m_ignoreMissingRows) {
                processRemainingRightRows(dc, leftTable, rightTable, rightRows);
            }
            break;
        }
        if ((m_rightIt == null) || (!m_rightIt.hasNext()) || (rightRows.nextClearBit(0) <= m_rightIt.getIndex())) {
            m_rightIt = new CounterRowIterator(rightTable.iterator());
        }
        while (m_rightIt.hasNext() && (map.size() > 0)) {
            m_exec.checkCanceled();
            DataRow rightRow = m_rightIt.next();
            SoftReference<Helper> sr = map.get(rightRow.getKey());
            if (sr != null) {
                Helper h = sr.get();
                if (h == null) {
                    map.remove(rightRow.getKey());
                } else {
                    h.m_rightRow = rightRow;
                    h.m_rightIndex = m_rightIt.getIndex();
                    if (h.m_leftIndex == m_leftRows) {
                        // m_firstMapHelper = h;
                        assert h.m_predecessor == null || !map.containsKey(h.m_predecessor.m_leftRow.getKey());
                        h.m_predecessor = null;
                        DataRow joinedRow = new JoinedRow(h.m_leftRow, h.m_rightRow);
                        dc.addRowToTable(joinedRow);
                        map.remove(rightRow.getKey());
                        rightRows.set(m_rightIt.getIndex());
                        m_leftRows++;
                        m_outputRows++;
                        printProgress(rightRow.getKey());
                    }
                }
            }
        }
        processRemainingLeftRowsInMap(dc, rightTable, map, rightRows);
        if (!m_ignoreMissingRows) {
            if (rightRows.cardinality() == inData[1].getRowCount()) {
                processRemainingLeftRowsInTable(dc, leftTable, rightTable);
            }
        } else {
            m_leftRows += map.size();
            map.clear();
            if (rightRows.cardinality() == inData[1].getRowCount()) {
                break;
            }
        }
    }
    m_leftIt = null;
    m_rightIt = null;
    m_exec = null;
    m_firstMapHelper = null;
    dc.close();
    return new BufferedDataTable[] { dc.getTable() };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) RowKey(org.knime.core.data.RowKey) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) BitSet(java.util.BitSet) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) SoftReference(java.lang.ref.SoftReference) BufferedDataTable(org.knime.core.node.BufferedDataTable) JoinedRow(org.knime.core.data.def.JoinedRow)

Example 3 with FilterColumnTable

use of org.knime.base.data.filter.column.FilterColumnTable in project knime-core by knime.

the class DefaultVisualizationNodeModel method execute.

/**
 * Converts the input data at inport 0 into a
 * {@link org.knime.base.node.util.DataArray} with maximum number of rows as
 * defined in the {@link DefaultVisualizationNodeDialog}. Thereby nominal
 * columns are irgnored whose possible values are null or more than 60.
 *
 * {@inheritDoc}
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
    // generate list of excluded columns, suppressing warning
    findCompatibleColumns(inData[0].getDataTableSpec(), false);
    DataTable filter = new FilterColumnTable(inData[0], false, getExcludedColumns());
    m_input = new DefaultDataArray(filter, 1, m_maxRows.getIntValue(), exec);
    if (m_maxRows.getIntValue() < inData[0].size()) {
        setWarningMessage("Only the first " + m_maxRows.getIntValue() + " rows are displayed.");
    }
    return new BufferedDataTable[0];
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 4 with FilterColumnTable

use of org.knime.base.data.filter.column.FilterColumnTable in project knime-core by knime.

the class PolyRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    BufferedDataTable inTable = (BufferedDataTable) inData[0];
    DataTableSpec inSpec = inTable.getDataTableSpec();
    final int colCount = inSpec.getNumColumns();
    String[] selectedCols = computeSelectedColumns(inSpec);
    Set<String> hash = new HashSet<String>(Arrays.asList(selectedCols));
    m_colSelected = new boolean[colCount];
    for (int i = 0; i < colCount; i++) {
        m_colSelected[i] = hash.contains(inTable.getDataTableSpec().getColumnSpec(i).getName());
    }
    final int rowCount = inTable.getRowCount();
    final int independentVariables = selectedCols.length;
    final int degree = m_settings.getDegree();
    final int dependentIndex = inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn());
    double[][] xMat = new double[rowCount][1 + independentVariables * degree];
    double[][] yMat = new double[rowCount][1];
    int rowIndex = 0;
    for (DataRow row : inTable) {
        exec.checkCanceled();
        exec.setProgress(0.2 * rowIndex / rowCount);
        xMat[rowIndex][0] = 1;
        int colIndex = 1;
        for (int i = 0; i < row.getNumCells(); i++) {
            if ((m_colSelected[i] || (i == dependentIndex)) && row.getCell(i).isMissing()) {
                throw new IllegalArgumentException("Missing values are not supported by this node.");
            }
            if (m_colSelected[i]) {
                double val = ((DoubleValue) row.getCell(i)).getDoubleValue();
                double poly = val;
                xMat[rowIndex][colIndex] = poly;
                colIndex++;
                for (int d = 2; d <= degree; d++) {
                    poly *= val;
                    xMat[rowIndex][colIndex] = poly;
                    colIndex++;
                }
            } else if (i == dependentIndex) {
                double val = ((DoubleValue) row.getCell(i)).getDoubleValue();
                yMat[rowIndex][0] = val;
            }
        }
        rowIndex++;
    }
    // compute X'
    double[][] xTransMat = MathUtils.transpose(xMat);
    exec.setProgress(0.24);
    exec.checkCanceled();
    // compute X'X
    double[][] xxMat = MathUtils.multiply(xTransMat, xMat);
    exec.setProgress(0.28);
    exec.checkCanceled();
    // compute X'Y
    double[][] xyMat = MathUtils.multiply(xTransMat, yMat);
    exec.setProgress(0.32);
    exec.checkCanceled();
    // compute (X'X)^-1
    double[][] xxInverse;
    try {
        xxInverse = MathUtils.inverse(xxMat);
        exec.setProgress(0.36);
        exec.checkCanceled();
    } catch (ArithmeticException ex) {
        throw new ArithmeticException("The attributes of the data samples" + " are not mutually independent.");
    }
    // compute (X'X)^-1 * (X'Y)
    final double[][] betas = MathUtils.multiply(xxInverse, xyMat);
    exec.setProgress(0.4);
    m_betas = new double[independentVariables * degree + 1];
    for (int i = 0; i < betas.length; i++) {
        m_betas[i] = betas[i][0];
    }
    m_columnNames = selectedCols;
    String[] temp = new String[m_columnNames.length + 1];
    System.arraycopy(m_columnNames, 0, temp, 0, m_columnNames.length);
    temp[temp.length - 1] = m_settings.getTargetColumn();
    FilterColumnTable filteredTable = new FilterColumnTable(inTable, temp);
    DataArray rowContainer = new DefaultDataArray(filteredTable, 1, m_settings.getMaxRowsForView());
    int ignore = rowContainer.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn());
    m_meanValues = new double[independentVariables];
    for (DataRow row : rowContainer) {
        int k = 0;
        for (int i = 0; i < row.getNumCells(); i++) {
            if (i != ignore) {
                m_meanValues[k++] += ((DoubleValue) row.getCell(i)).getDoubleValue();
            }
        }
    }
    for (int i = 0; i < m_meanValues.length; i++) {
        m_meanValues[i] /= rowContainer.size();
    }
    ColumnRearranger crea = new ColumnRearranger(inTable.getDataTableSpec());
    crea.append(getCellFactory(inTable.getDataTableSpec().findColumnIndex(m_settings.getTargetColumn())));
    // handle the optional PMML input
    PMMLPortObject inPMMLPort = (PMMLPortObject) inData[1];
    PortObject[] bdt = new PortObject[] { exec.createColumnRearrangeTable(inTable, crea, exec.createSubProgress(0.6)), createPMMLModel(inPMMLPort, inTable.getDataTableSpec()) };
    m_squaredError /= rowCount;
    m_viewData = new PolyRegViewData(m_meanValues, m_betas, m_squaredError, m_columnNames, m_settings.getDegree(), m_settings.getTargetColumn());
    m_rowContainer = rowContainer;
    return bdt;
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) DataRow(org.knime.core.data.DataRow) DataArray(org.knime.base.node.util.DataArray) DefaultDataArray(org.knime.base.node.util.DefaultDataArray) ColumnRearranger(org.knime.core.data.container.ColumnRearranger) DoubleValue(org.knime.core.data.DoubleValue) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) BufferedDataTable(org.knime.core.node.BufferedDataTable) PortObject(org.knime.core.node.port.PortObject) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) HashSet(java.util.HashSet)

Example 5 with FilterColumnTable

use of org.knime.base.data.filter.column.FilterColumnTable in project knime-core by knime.

the class PMCCNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
    final BufferedDataTable in = (BufferedDataTable) inData[0];
    // floating point operation
    final double rC = in.getRowCount();
    int[] includes = getIncludes(in.getDataTableSpec());
    String[] includeNames = m_columnIncludesList.getIncludeList().toArray(new String[0]);
    double progNormalize = 0.3;
    double progDetermine = 0.65;
    double progFinish = 1.0 - progNormalize - progDetermine;
    exec.setMessage("Normalizing data");
    final ExecutionMonitor normProg = exec.createSubProgress(progNormalize);
    FilterColumnTable filterTable = new FilterColumnTable(in, includes);
    final int l = includes.length;
    int nomCount = (l - 1) * l / 2;
    final HalfDoubleMatrix nominatorMatrix = new HalfDoubleMatrix(includes.length, /*withDiagonal*/
    false);
    nominatorMatrix.fill(Double.NaN);
    @SuppressWarnings("unchecked") final LinkedHashMap<DataCell, Integer>[] possibleValues = new LinkedHashMap[l];
    DataTableSpec filterTableSpec = filterTable.getDataTableSpec();
    for (int i = 0; i < l; i++) {
        DataColumnSpec cs = filterTableSpec.getColumnSpec(i);
        if (cs.getType().isCompatible(NominalValue.class)) {
            possibleValues[i] = new LinkedHashMap<DataCell, Integer>();
        }
    }
    final int possValueUpperBound = m_maxPossValueCountModel.getIntValue();
    // determines possible values. We can't use those from the domain
    // as the domain can also contain values not present in the data
    // but in the contingency table we need rows/columns to have at least
    // one cell with a value >= 1
    StatisticsTable statTable = new StatisticsTable(filterTable) {

        // that is sort of the constructor in this derived class
        {
            calculateAllMoments(in.getRowCount(), normProg);
        }

        @Override
        protected void calculateMomentInSubClass(final DataRow row) {
            for (int i = 0; i < l; i++) {
                if (possibleValues[i] != null) {
                    DataCell c = row.getCell(i);
                    // note: also take missing value as possible value
                    possibleValues[i].put(c, null);
                    if (possibleValues[i].size() > possValueUpperBound) {
                        possibleValues[i] = null;
                    }
                }
            }
        }
    };
    for (LinkedHashMap<DataCell, Integer> map : possibleValues) {
        if (map != null) {
            int index = 0;
            for (Map.Entry<DataCell, Integer> entry : map.entrySet()) {
                entry.setValue(index++);
            }
        }
    }
    // stores all pair-wise contingency tables,
    // contingencyTables[i] == null <--> either column of the corresponding
    // pair is non-categorical.
    // What is a contingency table?
    // http://en.wikipedia.org/wiki/Contingency_table
    int[][][] contingencyTables = new int[nomCount][][];
    // column which only contain one value - no correlation available
    LinkedHashSet<String> constantColumns = new LinkedHashSet<String>();
    int valIndex = 0;
    for (int i = 0; i < l; i++) {
        for (int j = i + 1; j < l; j++) {
            if (possibleValues[i] != null && possibleValues[j] != null) {
                int iSize = possibleValues[i].size();
                int jSize = possibleValues[j].size();
                contingencyTables[valIndex] = new int[iSize][jSize];
            }
            DataColumnSpec colSpecI = filterTableSpec.getColumnSpec(i);
            DataColumnSpec colSpecJ = filterTableSpec.getColumnSpec(j);
            DataType ti = colSpecI.getType();
            DataType tj = colSpecJ.getType();
            if (ti.isCompatible(DoubleValue.class) && tj.isCompatible(DoubleValue.class)) {
                // one of the two columns contains only one value
                if (statTable.getVariance(i) < PMCCPortObjectAndSpec.ROUND_ERROR_OK) {
                    constantColumns.add(colSpecI.getName());
                    nominatorMatrix.set(i, j, Double.NaN);
                } else if (statTable.getVariance(j) < PMCCPortObjectAndSpec.ROUND_ERROR_OK) {
                    constantColumns.add(colSpecJ.getName());
                    nominatorMatrix.set(i, j, Double.NaN);
                } else {
                    nominatorMatrix.set(i, j, 0.0);
                }
            }
            valIndex++;
        }
    }
    // to other column (will be a missing value)
    if (!constantColumns.isEmpty()) {
        String[] constantColumnNames = constantColumns.toArray(new String[constantColumns.size()]);
        NodeLogger.getLogger(getClass()).info("The following numeric " + "columns contain only one distinct value or have " + "otherwise a low standard deviation: " + Arrays.toString(constantColumnNames));
        int maxLength = 4;
        if (constantColumns.size() > maxLength) {
            constantColumnNames = Arrays.copyOf(constantColumnNames, maxLength);
            constantColumnNames[maxLength - 1] = "...";
        }
        setWarningMessage("Some columns contain only one distinct value: " + Arrays.toString(constantColumnNames));
    }
    DataTable att;
    if (statTable.getNrRows() > 0) {
        att = new Normalizer(statTable, includeNames).doZScoreNorm(// no iteration needed
        exec.createSubProgress(0.0));
    } else {
        att = statTable;
    }
    normProg.setProgress(1.0);
    exec.setMessage("Calculating correlation measure");
    ExecutionMonitor detProg = exec.createSubProgress(progDetermine);
    int rowIndex = 0;
    double[] buf = new double[l];
    DataCell[] catBuf = new DataCell[l];
    boolean containsMissing = false;
    for (DataRow r : att) {
        detProg.checkCanceled();
        for (int i = 0; i < l; i++) {
            catBuf[i] = null;
            buf[i] = Double.NaN;
            DataCell c = r.getCell(i);
            // missing value is also a possible value here
            if (possibleValues[i] != null) {
                catBuf[i] = c;
            } else if (c.isMissing()) {
                containsMissing = true;
            } else if (filterTableSpec.getColumnSpec(i).getType().isCompatible(DoubleValue.class)) {
                buf[i] = ((DoubleValue) c).getDoubleValue();
            }
        }
        valIndex = 0;
        for (int i = 0; i < l; i++) {
            for (int j = i + 1; j < l; j++) {
                double b1 = buf[i];
                double b2 = buf[j];
                if (!Double.isNaN(b1) && !Double.isNaN(b2)) {
                    double old = nominatorMatrix.get(i, j);
                    nominatorMatrix.set(i, j, old + b1 * b2);
                } else if (catBuf[i] != null && catBuf[j] != null) {
                    int iIndex = possibleValues[i].get(catBuf[i]);
                    assert iIndex >= 0 : "Value unknown in value list " + "of column " + includeNames[i] + ": " + catBuf[i];
                    int jIndex = possibleValues[j].get(catBuf[j]);
                    assert jIndex >= 0 : "Value unknown in value list " + "of column " + includeNames[j] + ": " + catBuf[j];
                    contingencyTables[valIndex][iIndex][jIndex]++;
                }
                valIndex++;
            }
        }
        rowIndex++;
        detProg.setProgress(rowIndex / rC, "Processing row " + rowIndex + " (\"" + r.getKey() + "\")");
    }
    if (containsMissing) {
        setWarningMessage("Some row(s) contained missing values.");
    }
    detProg.setProgress(1.0);
    double normalizer = 1.0 / (rC - 1.0);
    valIndex = 0;
    for (int i = 0; i < l; i++) {
        for (int j = i + 1; j < l; j++) {
            if (contingencyTables[valIndex] != null) {
                nominatorMatrix.set(i, j, computeCramersV(contingencyTables[valIndex]));
            } else if (!Double.isNaN(nominatorMatrix.get(i, j))) {
                double old = nominatorMatrix.get(i, j);
                nominatorMatrix.set(i, j, old * normalizer);
            }
            // else pair of columns is double - string (for instance)
            valIndex++;
        }
    }
    normProg.setProgress(progDetermine);
    PMCCPortObjectAndSpec pmccModel = new PMCCPortObjectAndSpec(includeNames, nominatorMatrix);
    ExecutionContext subExec = exec.createSubExecutionContext(progFinish);
    BufferedDataTable out = pmccModel.createCorrelationMatrix(subExec);
    m_correlationTable = out;
    return new PortObject[] { out, pmccModel };
}
Also used : LinkedHashSet(java.util.LinkedHashSet) DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataTableSpec(org.knime.core.data.DataTableSpec) FilterColumnTable(org.knime.base.data.filter.column.FilterColumnTable) StatisticsTable(org.knime.base.data.statistics.StatisticsTable) SettingsModelFilterString(org.knime.core.node.defaultnodesettings.SettingsModelFilterString) DataRow(org.knime.core.data.DataRow) LinkedHashMap(java.util.LinkedHashMap) DataColumnSpec(org.knime.core.data.DataColumnSpec) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataType(org.knime.core.data.DataType) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) PortObject(org.knime.core.node.port.PortObject) Normalizer(org.knime.base.data.normalize.Normalizer) ExecutionContext(org.knime.core.node.ExecutionContext) DoubleValue(org.knime.core.data.DoubleValue) HalfDoubleMatrix(org.knime.base.util.HalfDoubleMatrix) DataCell(org.knime.core.data.DataCell) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map)

Aggregations

FilterColumnTable (org.knime.base.data.filter.column.FilterColumnTable)5 BufferedDataTable (org.knime.core.node.BufferedDataTable)5 DataTableSpec (org.knime.core.data.DataTableSpec)4 DefaultDataArray (org.knime.base.node.util.DefaultDataArray)3 DataRow (org.knime.core.data.DataRow)3 DataTable (org.knime.core.data.DataTable)3 PortObject (org.knime.core.node.port.PortObject)3 HashSet (java.util.HashSet)2 LinkedHashMap (java.util.LinkedHashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 DataArray (org.knime.base.node.util.DataArray)2 DataColumnSpec (org.knime.core.data.DataColumnSpec)2 DoubleValue (org.knime.core.data.DoubleValue)2 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)2 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)2 ExecutionContext (org.knime.core.node.ExecutionContext)2 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)2 SoftReference (java.lang.ref.SoftReference)1 BitSet (java.util.BitSet)1 Map (java.util.Map)1