Search in sources :

Example 31 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class ARFFTableTest method testCreateDataTableSpecFromARFFfileIRIS.

/**
 * test the creation of a table spec from the IRIS data in an ARFF file.
 *
 * @throws IOException if it wants to.
 * @throws InvalidSettingsException if it feels like.
 */
public void testCreateDataTableSpecFromARFFfileIRIS() throws IOException, InvalidSettingsException {
    File tempFile = File.createTempFile("ARFFReaderUnitTest", "mini");
    tempFile.deleteOnExit();
    Writer out = new BufferedWriter(new FileWriter(tempFile));
    out.write(ARFF_IRISFULL);
    out.close();
    try {
        DataTableSpec tSpec = ARFFTable.createDataTableSpecFromARFFfile(tempFile.toURI().toURL(), null);
        // + "% The lovely Iris data set - as we all know it\n"
        // + "\n"
        // + "@RELATION iris\n"
        // + "\n"
        // + "@ATTRIBUTE sepallength REAL\n"
        // + "@ATTRIBUTE sepalwidth REAL\n"
        // + "@ATTRIBUTE petallength REAL\n"
        // + "@ATTRIBUTE petalwidth REAL\n"
        // + "@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}\n"
        // + "\n"
        assertEquals(tSpec.getNumColumns(), 5);
        assertEquals(tSpec.getColumnSpec(0).getName().toString(), "sepallength");
        assertEquals(tSpec.getColumnSpec(1).getName().toString(), "sepalwidth");
        assertEquals(tSpec.getColumnSpec(2).getName().toString(), "petallength");
        assertEquals(tSpec.getColumnSpec(3).getName().toString(), "petalwidth");
        assertEquals(tSpec.getColumnSpec(4).getName().toString(), "class");
        assertEquals(tSpec.getColumnSpec(0).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(1).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(2).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(3).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(4).getType(), StringCell.TYPE);
        assertNull(tSpec.getColumnSpec(0).getDomain().getValues());
        assertNull(tSpec.getColumnSpec(1).getDomain().getValues());
        assertNull(tSpec.getColumnSpec(2).getDomain().getValues());
        assertNull(tSpec.getColumnSpec(3).getDomain().getValues());
        assertEquals(tSpec.getColumnSpec(4).getDomain().getValues().size(), 3);
        Set<DataCell> vals = tSpec.getColumnSpec(4).getDomain().getValues();
        assertTrue(vals.contains(new StringCell("Iris-setosa")));
        assertTrue(vals.contains(new StringCell("Iris-versicolor")));
        assertTrue(vals.contains(new StringCell("Iris-virginica")));
    } catch (CanceledExecutionException cee) {
    // no chance to end up here.
    }
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) StringCell(org.knime.core.data.def.StringCell) CanceledExecutionException(org.knime.core.node.CanceledExecutionException) FileWriter(java.io.FileWriter) DataCell(org.knime.core.data.DataCell) File(java.io.File) BufferedWriter(java.io.BufferedWriter) FileWriter(java.io.FileWriter) Writer(java.io.Writer) BufferedWriter(java.io.BufferedWriter)

Example 32 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.

/**
 * Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
 * @throws Exception
 * @throws CanceledExecutionException
 */
@Test
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
    // try to sort a table with 1 entry
    String[] columnNames = { "col1", "col2", "col3", "col4" };
    DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
    DataRow[] rows = new DataRow[1];
    DataCell[] myRow = new DataCell[4];
    myRow[0] = new DoubleCell(2.4325);
    myRow[1] = new StringCell("Test");
    myRow[2] = new IntCell(7);
    myRow[3] = new DoubleCell(32432.324);
    rows[0] = new DefaultRow(Integer.toString(1), myRow);
    DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
    DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
    // set settings
    String[] includeCols = { "col1", "col2", "col3", "col4" };
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    boolean[] sortorder = { true, true, true, true };
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output
    RowIterator rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    Assert.assertEquals(rows[0], rowIt.next());
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
    // *********************************************//
    // try to sort a large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // start with a little one
    int dimension = 50;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    }
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    }
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    int k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk = rowIt.next().getKey();
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
        k--;
    }
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
    // *********************************************//
    // try to sort a very large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // dimension 300 => 15,8 secs.
    // dimension 500 => 49,7 secs.
    dimension = 100;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    }
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    }
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk = rowIt.next().getKey();
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
        k--;
    }
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
}
Also used : DataTable(org.knime.core.data.DataTable) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultTable(org.knime.core.data.def.DefaultTable) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) StringCell(org.knime.core.data.def.StringCell) RowIterator(org.knime.core.data.RowIterator) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) Test(org.junit.Test)

Example 33 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class LogisticRegressionContent method createCoeffStatisticsTablePortObject.

/**
 * Creates a BufferedDataTable with the
 * @param exec The execution context
 * @return a port object
 */
public BufferedDataTable createCoeffStatisticsTablePortObject(final ExecutionContext exec) {
    DataTableSpec tableOutSpec = LogRegCoordinator.createCoeffStatisticsTableSpec();
    BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
    List<DataCell> logits = this.getLogits();
    List<String> parameters = this.getParameters();
    int c = 0;
    for (DataCell logit : logits) {
        Map<String, Double> coefficients = this.getCoefficients(logit);
        Map<String, Double> stdErrs;
        Map<String, Double> zScores;
        Map<String, Double> pValues;
        if (m_covMat == null) {
            HashMap<String, Double> emptyMap = new HashMap<>();
            stdErrs = emptyMap;
            zScores = emptyMap;
            pValues = emptyMap;
        } else {
            stdErrs = this.getStandardErrors(logit);
            zScores = this.getZScores(logit);
            pValues = this.getPValues(logit);
        }
        for (String parameter : parameters) {
            List<DataCell> cells = new ArrayList<>();
            cells.add(new StringCell(logit.toString()));
            cells.add(new StringCell(parameter));
            cells.add(new DoubleCell(coefficients.get(parameter)));
            if (m_covMat != null) {
                cells.add(new DoubleCell(stdErrs.get(parameter)));
                cells.add(new DoubleCell(zScores.get(parameter)));
                cells.add(new DoubleCell(pValues.get(parameter)));
            } else {
                cells.add(NOT_INVERTIBLE_MISSING);
                cells.add(NOT_INVERTIBLE_MISSING);
                cells.add(NOT_INVERTIBLE_MISSING);
            }
            c++;
            dc.addRowToTable(new DefaultRow("Row" + c, cells));
        }
        List<DataCell> cells = new ArrayList<>();
        cells.add(new StringCell(logit.toString()));
        cells.add(new StringCell("Constant"));
        cells.add(new DoubleCell(this.getIntercept(logit)));
        if (m_covMat != null) {
            cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
            cells.add(new DoubleCell(this.getInterceptZScore(logit)));
            cells.add(new DoubleCell(this.getInterceptPValue(logit)));
        } else {
            cells.add(NOT_INVERTIBLE_MISSING);
            cells.add(NOT_INVERTIBLE_MISSING);
            cells.add(NOT_INVERTIBLE_MISSING);
        }
        c++;
        dc.addRowToTable(new DefaultRow("Row" + c, cells));
    }
    dc.close();
    return dc.getTable();
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell(org.knime.core.data.def.DoubleCell) ArrayList(java.util.ArrayList) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 34 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class SampleDataNodeModel method run.

private void run(final DataTableSpec spec, final RowOutput dataOutput, final DataTableSpec clusterSpec, final RowOutput clusterOutput, final ExecutionContext exec) throws Exception {
    Random rand = new Random(m_randomSeed);
    NodeLogger.getLogger(getClass()).info("Using '" + m_randomSeed + "' as seed for random data generation.");
    int dimensions = spec.getNumColumns() - 1;
    SizeSequence uniSizes = new SizeSequence(m_uniSize);
    SizeSequence clusters = new SizeSequence(m_clusterCount);
    int l = m_clusterCount.length - 1;
    final int overallClusterCount = clusters.getPosition(l) + clusters.getSize(l);
    final double noiseFrac = Math.min(Math.max(0.0, m_noiseFrac), 1.0);
    /*
         * the cluster centers. If a cluster doesn't restrict a dimension, the
         * value is NaN
         */
    double[][] optimalClusters = new double[Math.max(overallClusterCount, 1)][dimensions];
    if (overallClusterCount == 0) {
        Arrays.fill(optimalClusters[0], Double.NaN);
    }
    for (int c = 0; c < overallClusterCount; c++) {
        int uniToClusterIn = clusters.getIndex(c);
        int startPos = uniSizes.getPosition(uniToClusterIn);
        int endPos = startPos + uniSizes.getSize(uniToClusterIn);
        // assert (universeSize == uniSizes.getSize(uniToClusterIn));
        for (int d = 0; d < dimensions; d++) {
            if (d < startPos || d >= endPos) {
                optimalClusters[c][d] = Double.NaN;
            } else {
                double min = m_minValues[d];
                double max = m_maxValues[d];
                double range = max - min;
                double min2 = min + m_dev * range;
                double max2 = max - m_dev * range;
                double range2 = max2 - min2;
                double center = min2 + rand.nextDouble() * range2;
                optimalClusters[c][d] = center;
            }
        }
    }
    DataRow[] centerRows = new DataRow[overallClusterCount];
    int colNameLength = overallClusterCount + (noiseFrac > 0.0 ? 1 : 0);
    StringCell[] colNames = new StringCell[colNameLength];
    for (int i = 0; i < overallClusterCount; i++) {
        double[] cs = optimalClusters[i];
        DataCell[] cells = new DataCell[dimensions];
        for (int c = 0; c < dimensions; c++) {
            if (Double.isNaN(cs[c])) {
                cells[c] = DataType.getMissingCell();
            } else {
                cells[c] = new DoubleCell(cs[c]);
            }
        }
        colNames[i] = new StringCell("Cluster_" + i);
        centerRows[i] = new DefaultRow(colNames[i].toString(), cells);
    }
    if (noiseFrac > 0.0) {
        colNames[overallClusterCount] = new StringCell("Noise");
    }
    for (DataRow r : centerRows) {
        clusterOutput.push(r);
    }
    clusterOutput.close();
    /* first output (data) comes here */
    // assign attributes to patterns
    int noise = (int) (m_patCount * noiseFrac);
    int patternsPerCluster = (m_patCount - noise) / optimalClusters.length;
    int patternCount = patternsPerCluster * optimalClusters.length;
    noise = noiseFrac > 0.0 ? m_patCount - patternCount : 0;
    int pattern = 0;
    double totalCount = m_patCount;
    for (int c = 0; c < optimalClusters.length; c++) {
        // all clusters
        double[] centers = optimalClusters[c];
        // patterns in cluster
        for (int p = 0; p < patternsPerCluster; p++) {
            double[] d = fill(rand, centers);
            DataCell cl = (overallClusterCount > 0 ? colNames[c] : DataType.getMissingCell());
            DataRow r = createRow(RowKey.createRowKey(pattern), d, cl);
            dataOutput.push(r);
            final int patternTempFinal = pattern;
            exec.setProgress(pattern / totalCount, () -> ("Added row " + patternTempFinal));
            exec.checkCanceled();
            pattern++;
        }
    }
    assert (pattern == patternCount);
    double[] noiseCenter = new double[dimensions];
    Arrays.fill(noiseCenter, Double.NaN);
    // draw noise patterns
    for (int i = 0; i < noise; i++) {
        int index = i + pattern;
        double[] d = fill(rand, noiseCenter);
        DataCell cl = colNames[colNames.length - 1];
        DataRow r = createRow(RowKey.createRowKey(index), d, cl);
        dataOutput.push(r);
        exec.setProgress(index / totalCount, () -> ("Added row " + index));
        exec.checkCanceled();
    }
    dataOutput.close();
}
Also used : DoubleCell(org.knime.core.data.def.DoubleCell) DataRow(org.knime.core.data.DataRow) SizeSequence(javax.swing.SizeSequence) Random(java.util.Random) StringCell(org.knime.core.data.def.StringCell) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow)

Example 35 with StringCell

use of org.knime.core.data.def.StringCell in project knime-core by knime.

the class ParallelCoordinatesPlotter method calculateLines.

/**
 * Calculates the lines, containing the mapped data points.
 */
private synchronized List<LineInfo> calculateLines() {
    if (getDataProvider() == null || getDataProvider().getDataArray(getDataArrayIdx()) == null || m_axes == null) {
        return new ArrayList<LineInfo>();
    }
    DataArray array = getDataProvider().getDataArray(getDataArrayIdx());
    // LOGGER.debug("calculate points: " + m_axes);
    List<LineInfo> lines = new ArrayList<LineInfo>(array.size());
    row: for (DataRow row : array) {
        List<Point> points = new ArrayList<Point>();
        List<DataCell> domainValues = new ArrayList<DataCell>();
        for (ParallelAxis axis : m_axes) {
            int colIdx = array.getDataTableSpec().findColumnIndex(axis.getName());
            DataCell value = row.getCell(colIdx);
            if (value.isMissing() && m_skipMissingValues) {
                continue row;
            }
            domainValues.add(value);
            int x = (int) getXAxis().getCoordinate().calculateMappedValue(new StringCell(axis.getName()), getDrawingPaneDimension().width);
            int y = MISSING;
            if (!value.isMissing()) {
                y = getDrawingPaneDimension().height - ParallelCoordinateDrawingPane.BOTTOM_SPACE - (int) axis.getMappedValue(value);
            }
            Point p = new Point(x, y);
            points.add(p);
        }
        boolean isHilite = delegateIsHiLit(row.getKey());
        if (!m_hide || (m_hide && isHilite)) {
            LineInfo line = new LineInfo(points, domainValues, m_selected.contains(row.getKey()), isHilite, array.getDataTableSpec().getRowColor(row), array.getDataTableSpec().getRowSizeFactor(row), row.getKey());
            line.setShape(array.getDataTableSpec().getRowShape(row));
            lines.add(line);
        }
    }
    return lines;
}
Also used : StringCell(org.knime.core.data.def.StringCell) ArrayList(java.util.ArrayList) DataCell(org.knime.core.data.DataCell) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Point(java.awt.Point) DataRow(org.knime.core.data.DataRow) DataArray(org.knime.base.node.util.DataArray)

Aggregations

StringCell (org.knime.core.data.def.StringCell)176 DataCell (org.knime.core.data.DataCell)130 DoubleCell (org.knime.core.data.def.DoubleCell)67 DefaultRow (org.knime.core.data.def.DefaultRow)65 IntCell (org.knime.core.data.def.IntCell)55 DataRow (org.knime.core.data.DataRow)52 DataTableSpec (org.knime.core.data.DataTableSpec)49 ArrayList (java.util.ArrayList)41 DataColumnSpec (org.knime.core.data.DataColumnSpec)37 RowKey (org.knime.core.data.RowKey)36 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)26 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)26 DataType (org.knime.core.data.DataType)22 LinkedHashSet (java.util.LinkedHashSet)21 BufferedDataTable (org.knime.core.node.BufferedDataTable)20 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)19 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)16 LinkedHashMap (java.util.LinkedHashMap)15 Test (org.junit.Test)15 HashMap (java.util.HashMap)11