Search in sources :

Example 16 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class JoinedTableTest method testGetRowIterator.

/**
 * Test for RowIterator. That is the one that is most likely to fail ...
 */
public final void testGetRowIterator() {
    DataColumnSpec[] leftCols = new DataColumnSpec[3];
    DataColumnSpec[] rightCols = new DataColumnSpec[3];
    System.arraycopy(COLS, 0, leftCols, 0, 3);
    System.arraycopy(COLS, 3, rightCols, 0, 3);
    final int allLength = 100;
    DataRow[] leftRows = new DataRow[allLength];
    DataRow[] rightRows = new DataRow[allLength];
    Hashtable<RowKey, DataRow> rightHash = new Hashtable<RowKey, DataRow>();
    for (int i = 0; i < allLength; i++) {
        String id = "Id_" + i;
        leftRows[i] = getRandomRow(id);
        rightRows[i] = getRandomRow(id);
        rightHash.put(rightRows[i].getKey(), rightRows[i]);
    }
    final DataTable leftTable = new DefaultTable(leftRows, new DataTableSpec(leftCols));
    final DataTable rightTable = new DefaultTable(rightRows, new DataTableSpec(rightCols));
    JoinedTable t = new JoinedTable(leftTable, rightTable);
    // everything comes in order, shouldn't make a problem.
    int count = checkForEquality(t, leftRows, rightHash);
    assertEquals(count, allLength);
    // shuffle the right table
    DataRow[] shuffledRightRows = new DataRow[allLength];
    System.arraycopy(rightRows, 0, shuffledRightRows, 0, allLength);
    List<DataRow> c = Arrays.asList(shuffledRightRows);
    Collections.shuffle(c, RAND);
    shuffledRightRows = c.toArray(shuffledRightRows);
    DataTable shuffleRightTable = new DefaultTable(shuffledRightRows, new DataTableSpec(rightCols));
    t = new JoinedTable(leftTable, shuffleRightTable);
    count = checkForEquality(t, leftRows, rightHash);
    assertEquals(count, allLength);
    // wow, it survived that.
    // let's delete some of the rows in the right table.
    // supposedly, the table will fill it with missing values ...
    final int newLength = (int) (0.8 * allLength);
    DataRow[] shuffledAndTruncRightRows = new DataRow[newLength];
    System.arraycopy(shuffledRightRows, 0, shuffledAndTruncRightRows, 0, newLength);
    Hashtable<RowKey, DataRow> newHash = new Hashtable<RowKey, DataRow>(rightHash);
    for (int i = newLength; i < allLength; i++) {
        RowKey removeMe = shuffledRightRows[i].getKey();
        newHash.remove(removeMe);
    }
    DataTable shuffleAndTruncRightTable = new DefaultTable(shuffledAndTruncRightRows, new DataTableSpec(rightCols));
    t = new JoinedTable(leftTable, shuffleAndTruncRightTable);
    count = checkForEquality(t, leftRows, newHash);
    assertEquals(count, allLength);
    // now cut shorten the left table
    DataRow[] truncLeftRows = new DataRow[newLength];
    System.arraycopy(leftRows, 0, truncLeftRows, 0, newLength);
    DataTable truncLeftTable = new DefaultTable(truncLeftRows, new DataTableSpec(leftCols));
    t = new JoinedTable(truncLeftTable, rightTable);
    count = checkForEquality(t, truncLeftRows, rightHash);
    assertEquals(count, allLength);
    // tables share no rows at all
    final int halfLength = allLength / 2;
    DataRow[] halfLeftRows = new DataRow[halfLength];
    DataRow[] halfRightRows = new DataRow[halfLength];
    System.arraycopy(leftRows, 0, halfLeftRows, 0, halfLength);
    System.arraycopy(rightRows, halfLength, halfRightRows, 0, halfLength);
    Hashtable<RowKey, DataRow> halfRightHash = new Hashtable<RowKey, DataRow>();
    for (int i = 0; i < halfLength; i++) {
        DataRow current = halfRightRows[i];
        halfRightHash.put(current.getKey(), current);
    }
    DataTable halfLeftTable = new DefaultTable(halfLeftRows, new DataTableSpec(leftCols));
    DataTable halfRightTable = new DefaultTable(halfRightRows, new DataTableSpec(rightCols));
    t = new JoinedTable(halfLeftTable, halfRightTable);
    count = checkForEquality(t, halfLeftRows, halfRightHash);
    assertEquals(count, 2 * halfLength);
    // left table is empty
    DataTable emptyLeftTable = new DefaultTable(new DataRow[0], new DataTableSpec(leftCols));
    t = new JoinedTable(emptyLeftTable, halfRightTable);
    count = checkForEquality(t, new DataRow[0], halfRightHash);
    assertEquals(count, halfLength);
    // right table is empty
    DataTable emptyRightTable = new DefaultTable(new DataRow[0], new DataTableSpec(rightCols));
    t = new JoinedTable(halfLeftTable, emptyRightTable);
    count = checkForEquality(t, halfLeftRows, new Hashtable<RowKey, DataRow>());
    assertEquals(count, halfLength);
}
Also used : DataTable(org.knime.core.data.DataTable) DataTableSpec(org.knime.core.data.DataTableSpec) RowKey(org.knime.core.data.RowKey) Hashtable(java.util.Hashtable) DefaultTable(org.knime.core.data.def.DefaultTable) DataRow(org.knime.core.data.DataRow) DataColumnSpec(org.knime.core.data.DataColumnSpec)

Example 17 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class JoinedTableTest method testGetDataTableSpec.

/**
 * Test for getDataTableSpec().
 */
public final void testGetDataTableSpec() {
    DataColumnSpec[] leftCols = new DataColumnSpec[3];
    DataColumnSpec[] rightCols = new DataColumnSpec[3];
    System.arraycopy(COLS, 0, leftCols, 0, 3);
    System.arraycopy(COLS, 3, rightCols, 0, 3);
    DataTable leftTable = new DefaultTable(new DataRow[0], new DataTableSpec(leftCols));
    DataTable rightTable = new DefaultTable(new DataRow[0], new DataTableSpec(rightCols));
    JoinedTable t = new JoinedTable(leftTable, rightTable);
    DataTableSpec s = t.getDataTableSpec();
    assert (!t.iterator().hasNext());
    assertEquals(s.getNumColumns(), COLS.length);
    for (int i = 0; i < COLS.length; i++) {
        assertEquals(s.getColumnSpec(i), COLS[i]);
    }
}
Also used : DataTable(org.knime.core.data.DataTable) DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) DefaultTable(org.knime.core.data.def.DefaultTable)

Example 18 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class JoinerJoinAnyTest method testRunner.

private final void testRunner(final Joiner2Settings settings, final Integer[][] reference, final int numBitsInitial, final int numBitsMaximal) throws CanceledExecutionException, InvalidSettingsException {
    // Create data with fields that consume a lot memory
    DataTable leftInput = new IntegerTable(new String[] { "L1", "L2" }, new Integer[][] { new Integer[] { 0, 0 }, new Integer[] { 0, 1 }, new Integer[] { 1, 0 }, new Integer[] { 1, 1 }, new Integer[] { 2, 2 }, new Integer[] { 3, 3 }, new Integer[] { 4, 4 }, new Integer[] { 5, 5 }, new Integer[] { 6, 6 } });
    DataTable rightInput = new IntegerTable(new String[] { "R1", "R2" }, new Integer[][] { new Integer[] { 0, 1 }, new Integer[] { 1, 0 }, new Integer[] { 1, 1 }, new Integer[] { 1, 2 }, new Integer[] { 2, 2 }, new Integer[] { 3, 3 }, new Integer[] { 10, 10 } });
    BufferedDataTable bdtLeft = m_exec.createBufferedDataTable(leftInput, m_exec);
    BufferedDataTable bdtRight = m_exec.createBufferedDataTable(rightInput, m_exec);
    // run joiner
    Joiner joiner = new Joiner(leftInput.getDataTableSpec(), rightInput.getDataTableSpec(), settings);
    // force one bin only
    joiner.setNumBitsInitial(numBitsInitial);
    joiner.setNumBitsMaximal(numBitsMaximal);
    BufferedDataTable output = joiner.computeJoinTable(bdtLeft, bdtRight, m_exec);
    Integer[][] outputArray = toIntegerArray(output);
    // Test for equality of the arrays
    Assert.assertEquals(reference.length, outputArray.length);
    for (int i = 0; i < reference.length; i++) {
        Assert.assertArrayEquals(reference[i], outputArray[i]);
    }
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable)

Example 19 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.

/**
 * Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
 * @throws Exception
 * @throws CanceledExecutionException
 */
@Test
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
    // try to sort a table with 1 entry
    String[] columnNames = { "col1", "col2", "col3", "col4" };
    DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
    DataRow[] rows = new DataRow[1];
    DataCell[] myRow = new DataCell[4];
    myRow[0] = new DoubleCell(2.4325);
    myRow[1] = new StringCell("Test");
    myRow[2] = new IntCell(7);
    myRow[3] = new DoubleCell(32432.324);
    rows[0] = new DefaultRow(Integer.toString(1), myRow);
    DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
    DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
    // set settings
    String[] includeCols = { "col1", "col2", "col3", "col4" };
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    boolean[] sortorder = { true, true, true, true };
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output
    RowIterator rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    Assert.assertEquals(rows[0], rowIt.next());
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
    // *********************************************//
    // try to sort a large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // start with a little one
    int dimension = 50;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    }
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    }
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    int k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk = rowIt.next().getKey();
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
        k--;
    }
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
    // *********************************************//
    // try to sort a very large array of DataRows
    // In this case we generate a unit matrix
    // *********************************************//
    // dimension 300 => 15,8 secs.
    // dimension 500 => 49,7 secs.
    dimension = 100;
    // *********************************************//
    // set settings
    includeCols = new String[dimension];
    for (int i = 0; i < dimension; i++) {
        includeCols[i] = "col" + i;
    }
    m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
    sortorder = new boolean[dimension];
    for (int i = 0; i < dimension; i++) {
        sortorder[i] = true;
    }
    m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
    DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
    m_snm.loadValidatedSettingsFrom(m_settings);
    resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
    // test output (should have sorted all rows in reverse order)
    rowIt = resultTable[0].iterator();
    Assert.assertTrue(rowIt.hasNext());
    k = dimension - 1;
    while (rowIt.hasNext()) {
        RowKey rk = rowIt.next().getKey();
        int ic = Integer.parseInt(rk.getString());
        Assert.assertEquals(k, ic);
        k--;
    }
    Assert.assertFalse(rowIt.hasNext());
    m_snm.reset();
}
Also used : DataTable(org.knime.core.data.DataTable) RowKey(org.knime.core.data.RowKey) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultTable(org.knime.core.data.def.DefaultTable) DataRow(org.knime.core.data.DataRow) IntCell(org.knime.core.data.def.IntCell) StringCell(org.knime.core.data.def.StringCell) RowIterator(org.knime.core.data.RowIterator) DataType(org.knime.core.data.DataType) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) Test(org.junit.Test)

Example 20 with DataTable

use of org.knime.core.data.DataTable in project knime-core by knime.

the class TableSorterTest method runMemoryTest.

private void runMemoryTest(final int numRows, final int maxNumRowsPerContainer, final int maxOpenContainers) throws CanceledExecutionException {
    // Create data with fields that consume a lot memory
    DataTable inputTable = new TestData(numRows, 1);
    BufferedDataTable bdt = m_exec.createBufferedDataTable(inputTable, m_exec);
    BufferedDataTableSorter sorter = new BufferedDataTableSorter(bdt, Arrays.asList("Index"), new boolean[] { true });
    sorter.setMaxOpenContainers(maxOpenContainers);
    BufferedDataTable defaultResult = sorter.sort(m_exec);
    sorter.setMaxRows(maxNumRowsPerContainer);
    // 10MB free memory
    long currentlyUsed = MemoryAlertSystem.getUsedMemory();
    double fraction = Math.min(1, (currentlyUsed + (10 << 20)) / (double) MemoryAlertSystem.getMaximumMemory());
    MemoryAlertSystem.getInstance().setFractionUsageThreshold(fraction);
    try {
        sorter.setMemService(MemoryAlertSystem.getInstance());
        // run again with change settings
        BufferedDataTable result = sorter.sort(m_exec);
        // Check if column is sorted in ascending order
        int prevValue = Integer.MIN_VALUE;
        for (DataRow row : result) {
            int thisValue = ((IntValue) row.getCell(0)).getIntValue();
            Assert.assertTrue(thisValue >= prevValue);
        }
        // Check if it has the same results as defaultResult
        Assert.assertTrue(defaultResult.getRowCount() == result.getRowCount());
        RowIterator defaultIter = defaultResult.iterator();
        RowIterator iter = result.iterator();
        while (defaultIter.hasNext()) {
            DataRow defaultRow = defaultIter.next();
            DataRow row = iter.next();
            Assert.assertTrue(defaultRow.getKey().getString().equals(row.getKey().getString()));
            Iterator<DataCell> defaultCellIter = defaultRow.iterator();
            Iterator<DataCell> cellIter = row.iterator();
            while (defaultCellIter.hasNext()) {
                Assert.assertTrue(defaultCellIter.next().equals(cellIter.next()));
            }
        }
    } finally {
        MemoryAlertSystem.getInstance().setFractionUsageThreshold(MemoryAlertSystem.DEFAULT_USAGE_THRESHOLD);
    }
}
Also used : DataTable(org.knime.core.data.DataTable) BufferedDataTable(org.knime.core.node.BufferedDataTable) RowIterator(org.knime.core.data.RowIterator) BufferedDataTable(org.knime.core.node.BufferedDataTable) DataCell(org.knime.core.data.DataCell) DataRow(org.knime.core.data.DataRow) IntValue(org.knime.core.data.IntValue)

Aggregations

DataTable (org.knime.core.data.DataTable)64 BufferedDataTable (org.knime.core.node.BufferedDataTable)33 DataRow (org.knime.core.data.DataRow)20 DataTableSpec (org.knime.core.data.DataTableSpec)19 RowKey (org.knime.core.data.RowKey)14 DataCell (org.knime.core.data.DataCell)12 DataColumnSpec (org.knime.core.data.DataColumnSpec)12 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)11 DefaultTable (org.knime.core.data.def.DefaultTable)10 DefaultRow (org.knime.core.data.def.DefaultRow)8 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)7 DefaultDataArray (org.knime.base.node.util.DefaultDataArray)6 DataType (org.knime.core.data.DataType)6 PortObject (org.knime.core.node.port.PortObject)6 RowIterator (org.knime.core.data.RowIterator)5 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)5 ContainerTable (org.knime.core.data.container.ContainerTable)5 DataContainer (org.knime.core.data.container.DataContainer)5 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)5 SettingsModelFilterString (org.knime.core.node.defaultnodesettings.SettingsModelFilterString)5