Search in sources :

Example 31 with StringCell

use of in project knime-core by knime.

the class ARFFTableTest method testCreateDataTableSpecFromARFFfileIRIS.

 * test the creation of a table spec from the IRIS data in an ARFF file.
 * @throws IOException if it wants to.
 * @throws InvalidSettingsException if it feels like.
public void testCreateDataTableSpecFromARFFfileIRIS() throws IOException, InvalidSettingsException {
    File tempFile = File.createTempFile("ARFFReaderUnitTest", "mini");
    Writer out = new BufferedWriter(new FileWriter(tempFile));
    try {
        DataTableSpec tSpec = ARFFTable.createDataTableSpecFromARFFfile(tempFile.toURI().toURL(), null);
        // + "% The lovely Iris data set - as we all know it\n"
        // + "\n"
        // + "@RELATION iris\n"
        // + "\n"
        // + "@ATTRIBUTE sepallength REAL\n"
        // + "@ATTRIBUTE sepalwidth REAL\n"
        // + "@ATTRIBUTE petallength REAL\n"
        // + "@ATTRIBUTE petalwidth REAL\n"
        // + "@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}\n"
        // + "\n"
        assertEquals(tSpec.getNumColumns(), 5);
        assertEquals(tSpec.getColumnSpec(0).getName().toString(), "sepallength");
        assertEquals(tSpec.getColumnSpec(1).getName().toString(), "sepalwidth");
        assertEquals(tSpec.getColumnSpec(2).getName().toString(), "petallength");
        assertEquals(tSpec.getColumnSpec(3).getName().toString(), "petalwidth");
        assertEquals(tSpec.getColumnSpec(4).getName().toString(), "class");
        assertEquals(tSpec.getColumnSpec(0).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(1).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(2).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(3).getType(), DoubleCell.TYPE);
        assertEquals(tSpec.getColumnSpec(4).getType(), StringCell.TYPE);
        assertEquals(tSpec.getColumnSpec(4).getDomain().getValues().size(), 3);
        Set<DataCell> vals = tSpec.getColumnSpec(4).getDomain().getValues();
        assertTrue(vals.contains(new StringCell("Iris-setosa")));
        assertTrue(vals.contains(new StringCell("Iris-versicolor")));
        assertTrue(vals.contains(new StringCell("Iris-virginica")));
    } catch (CanceledExecutionException cee) {
    // no chance to end up here.
Also used : DataTableSpec( StringCell( CanceledExecutionException(org.knime.core.node.CanceledExecutionException) FileWriter( DataCell( File( BufferedWriter( FileWriter( Writer( BufferedWriter(

Example 32 with StringCell

use of in project knime-core by knime.

the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.

 * Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
 * @throws Exception
 * @throws CanceledExecutionException
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
    // try to sort a table with 1 entry
    String[] columnNames = { "col1", "col2", "col3", "col4" };
    DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
    DataRow[] rows = new DataRow[1];
    DataCell[] myRow = new DataCell[4];
    myRow[0] = new DoubleCell(2.4325);
    myRow[1] = new StringCell("Test");
    myRow[2] = new IntCell(7);
    myRow[3] = new DoubleCell(32432.324);
    rows[0] = new DefaultRow(Integer.toString(1), myRow);
    DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
    DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
    // set settings
    String[] includeCols = { "col1", "col2", "col3", "col4" };
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    boolean[] sortorder = { true, true, true, true };
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output
    RowIterator rowIt = resultTable[0].iterator();
    // *********************************************//
    // try to sort a large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // start with a little one
    int dimension = 50;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    int k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk =;
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
    // *********************************************//
    // try to sort a very large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // dimension 300 => 15,8 secs.
    // dimension 500 => 49,7 secs.
    dimension = 100;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk =;
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
Also used : DataTable( RowKey( DoubleCell( DefaultTable( DataRow( IntCell( StringCell( RowIterator( DataType( DataCell( DefaultRow( Test(org.junit.Test)

Example 33 with StringCell

use of in project knime-core by knime.

the class LogisticRegressionContent method createCoeffStatisticsTablePortObject.

 * Creates a BufferedDataTable with the
 * @param exec The execution context
 * @return a port object
public BufferedDataTable createCoeffStatisticsTablePortObject(final ExecutionContext exec) {
    DataTableSpec tableOutSpec = LogRegCoordinator.createCoeffStatisticsTableSpec();
    BufferedDataContainer dc = exec.createDataContainer(tableOutSpec);
    List<DataCell> logits = this.getLogits();
    List<String> parameters = this.getParameters();
    int c = 0;
    for (DataCell logit : logits) {
        Map<String, Double> coefficients = this.getCoefficients(logit);
        Map<String, Double> stdErrs;
        Map<String, Double> zScores;
        Map<String, Double> pValues;
        if (m_covMat == null) {
            HashMap<String, Double> emptyMap = new HashMap<>();
            stdErrs = emptyMap;
            zScores = emptyMap;
            pValues = emptyMap;
        } else {
            stdErrs = this.getStandardErrors(logit);
            zScores = this.getZScores(logit);
            pValues = this.getPValues(logit);
        for (String parameter : parameters) {
            List<DataCell> cells = new ArrayList<>();
            cells.add(new StringCell(logit.toString()));
            cells.add(new StringCell(parameter));
            cells.add(new DoubleCell(coefficients.get(parameter)));
            if (m_covMat != null) {
                cells.add(new DoubleCell(stdErrs.get(parameter)));
                cells.add(new DoubleCell(zScores.get(parameter)));
                cells.add(new DoubleCell(pValues.get(parameter)));
            } else {
            dc.addRowToTable(new DefaultRow("Row" + c, cells));
        List<DataCell> cells = new ArrayList<>();
        cells.add(new StringCell(logit.toString()));
        cells.add(new StringCell("Constant"));
        cells.add(new DoubleCell(this.getIntercept(logit)));
        if (m_covMat != null) {
            cells.add(new DoubleCell(this.getInterceptStdErr(logit)));
            cells.add(new DoubleCell(this.getInterceptZScore(logit)));
            cells.add(new DoubleCell(this.getInterceptPValue(logit)));
        } else {
        dc.addRowToTable(new DefaultRow("Row" + c, cells));
    return dc.getTable();
Also used : DataTableSpec( BufferedDataContainer(org.knime.core.node.BufferedDataContainer) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleCell( ArrayList(java.util.ArrayList) StringCell( DataCell( DefaultRow(

Example 34 with StringCell

use of in project knime-core by knime.

the class SampleDataNodeModel method run.

private void run(final DataTableSpec spec, final RowOutput dataOutput, final DataTableSpec clusterSpec, final RowOutput clusterOutput, final ExecutionContext exec) throws Exception {
    Random rand = new Random(m_randomSeed);
    NodeLogger.getLogger(getClass()).info("Using '" + m_randomSeed + "' as seed for random data generation.");
    int dimensions = spec.getNumColumns() - 1;
    SizeSequence uniSizes = new SizeSequence(m_uniSize);
    SizeSequence clusters = new SizeSequence(m_clusterCount);
    int l = m_clusterCount.length - 1;
    final int overallClusterCount = clusters.getPosition(l) + clusters.getSize(l);
    final double noiseFrac = Math.min(Math.max(0.0, m_noiseFrac), 1.0);
         * the cluster centers. If a cluster doesn't restrict a dimension, the
         * value is NaN
    double[][] optimalClusters = new double[Math.max(overallClusterCount, 1)][dimensions];
    if (overallClusterCount == 0) {
        Arrays.fill(optimalClusters[0], Double.NaN);
    for (int c = 0; c < overallClusterCount; c++) {
        int uniToClusterIn = clusters.getIndex(c);
        int startPos = uniSizes.getPosition(uniToClusterIn);
        int endPos = startPos + uniSizes.getSize(uniToClusterIn);
        // assert (universeSize == uniSizes.getSize(uniToClusterIn));
        for (int d = 0; d < dimensions; d++) {
            if (d < startPos || d >= endPos) {
                optimalClusters[c][d] = Double.NaN;
            } else {
                double min = m_minValues[d];
                double max = m_maxValues[d];
                double range = max - min;
                double min2 = min + m_dev * range;
                double max2 = max - m_dev * range;
                double range2 = max2 - min2;
                double center = min2 + rand.nextDouble() * range2;
                optimalClusters[c][d] = center;
    DataRow[] centerRows = new DataRow[overallClusterCount];
    int colNameLength = overallClusterCount + (noiseFrac > 0.0 ? 1 : 0);
    StringCell[] colNames = new StringCell[colNameLength];
    for (int i = 0; i < overallClusterCount; i++) {
        double[] cs = optimalClusters[i];
        DataCell[] cells = new DataCell[dimensions];
        for (int c = 0; c < dimensions; c++) {
            if (Double.isNaN(cs[c])) {
                cells[c] = DataType.getMissingCell();
            } else {
                cells[c] = new DoubleCell(cs[c]);
        colNames[i] = new StringCell("Cluster_" + i);
        centerRows[i] = new DefaultRow(colNames[i].toString(), cells);
    if (noiseFrac > 0.0) {
        colNames[overallClusterCount] = new StringCell("Noise");
    for (DataRow r : centerRows) {
    /* first output (data) comes here */
    // assign attributes to patterns
    int noise = (int) (m_patCount * noiseFrac);
    int patternsPerCluster = (m_patCount - noise) / optimalClusters.length;
    int patternCount = patternsPerCluster * optimalClusters.length;
    noise = noiseFrac > 0.0 ? m_patCount - patternCount : 0;
    int pattern = 0;
    double totalCount = m_patCount;
    for (int c = 0; c < optimalClusters.length; c++) {
        // all clusters
        double[] centers = optimalClusters[c];
        // patterns in cluster
        for (int p = 0; p < patternsPerCluster; p++) {
            double[] d = fill(rand, centers);
            DataCell cl = (overallClusterCount > 0 ? colNames[c] : DataType.getMissingCell());
            DataRow r = createRow(RowKey.createRowKey(pattern), d, cl);
            final int patternTempFinal = pattern;
            exec.setProgress(pattern / totalCount, () -> ("Added row " + patternTempFinal));
    assert (pattern == patternCount);
    double[] noiseCenter = new double[dimensions];
    Arrays.fill(noiseCenter, Double.NaN);
    // draw noise patterns
    for (int i = 0; i < noise; i++) {
        int index = i + pattern;
        double[] d = fill(rand, noiseCenter);
        DataCell cl = colNames[colNames.length - 1];
        DataRow r = createRow(RowKey.createRowKey(index), d, cl);
        exec.setProgress(index / totalCount, () -> ("Added row " + index));
Also used : DoubleCell( DataRow( SizeSequence(javax.swing.SizeSequence) Random(java.util.Random) StringCell( DataCell( DefaultRow(

Example 35 with StringCell

use of in project knime-core by knime.

the class ParallelCoordinatesPlotter method calculateLines.

 * Calculates the lines, containing the mapped data points.
private synchronized List<LineInfo> calculateLines() {
    if (getDataProvider() == null || getDataProvider().getDataArray(getDataArrayIdx()) == null || m_axes == null) {
        return new ArrayList<LineInfo>();
    DataArray array = getDataProvider().getDataArray(getDataArrayIdx());
    // LOGGER.debug("calculate points: " + m_axes);
    List<LineInfo> lines = new ArrayList<LineInfo>(array.size());
    row: for (DataRow row : array) {
        List<Point> points = new ArrayList<Point>();
        List<DataCell> domainValues = new ArrayList<DataCell>();
        for (ParallelAxis axis : m_axes) {
            int colIdx = array.getDataTableSpec().findColumnIndex(axis.getName());
            DataCell value = row.getCell(colIdx);
            if (value.isMissing() && m_skipMissingValues) {
                continue row;
            int x = (int) getXAxis().getCoordinate().calculateMappedValue(new StringCell(axis.getName()), getDrawingPaneDimension().width);
            int y = MISSING;
            if (!value.isMissing()) {
                y = getDrawingPaneDimension().height - ParallelCoordinateDrawingPane.BOTTOM_SPACE - (int) axis.getMappedValue(value);
            Point p = new Point(x, y);
        boolean isHilite = delegateIsHiLit(row.getKey());
        if (!m_hide || (m_hide && isHilite)) {
            LineInfo line = new LineInfo(points, domainValues, m_selected.contains(row.getKey()), isHilite, array.getDataTableSpec().getRowColor(row), array.getDataTableSpec().getRowSizeFactor(row), row.getKey());
    return lines;
Also used : StringCell( ArrayList(java.util.ArrayList) DataCell( ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) Point(java.awt.Point) DataRow( DataArray(org.knime.base.node.util.DataArray)


StringCell ( DataCell ( DoubleCell ( DefaultRow ( IntCell ( DataRow ( DataTableSpec ( ArrayList (java.util.ArrayList)41 DataColumnSpec ( RowKey ( DataColumnSpecCreator ( BufferedDataContainer (org.knime.core.node.BufferedDataContainer)26 DataType ( LinkedHashSet (java.util.LinkedHashSet)21 BufferedDataTable (org.knime.core.node.BufferedDataTable)20 ColumnRearranger ( InvalidSettingsException (org.knime.core.node.InvalidSettingsException)16 LinkedHashMap (java.util.LinkedHashMap)15 Test (org.junit.Test)15 HashMap (java.util.HashMap)11