use of org.knime.core.data.DataTable in project knime-core by knime.
the class JoinedTableTest method testGetRowIterator.
/**
* Test for RowIterator. That is the one that is most likely to fail ...
*/
public final void testGetRowIterator() {
DataColumnSpec[] leftCols = new DataColumnSpec[3];
DataColumnSpec[] rightCols = new DataColumnSpec[3];
System.arraycopy(COLS, 0, leftCols, 0, 3);
System.arraycopy(COLS, 3, rightCols, 0, 3);
final int allLength = 100;
DataRow[] leftRows = new DataRow[allLength];
DataRow[] rightRows = new DataRow[allLength];
Hashtable<RowKey, DataRow> rightHash = new Hashtable<RowKey, DataRow>();
for (int i = 0; i < allLength; i++) {
String id = "Id_" + i;
leftRows[i] = getRandomRow(id);
rightRows[i] = getRandomRow(id);
rightHash.put(rightRows[i].getKey(), rightRows[i]);
}
final DataTable leftTable = new DefaultTable(leftRows, new DataTableSpec(leftCols));
final DataTable rightTable = new DefaultTable(rightRows, new DataTableSpec(rightCols));
JoinedTable t = new JoinedTable(leftTable, rightTable);
// everything comes in order, shouldn't make a problem.
int count = checkForEquality(t, leftRows, rightHash);
assertEquals(count, allLength);
// shuffle the right table
DataRow[] shuffledRightRows = new DataRow[allLength];
System.arraycopy(rightRows, 0, shuffledRightRows, 0, allLength);
List<DataRow> c = Arrays.asList(shuffledRightRows);
Collections.shuffle(c, RAND);
shuffledRightRows = c.toArray(shuffledRightRows);
DataTable shuffleRightTable = new DefaultTable(shuffledRightRows, new DataTableSpec(rightCols));
t = new JoinedTable(leftTable, shuffleRightTable);
count = checkForEquality(t, leftRows, rightHash);
assertEquals(count, allLength);
// wow, it survived that.
// let's delete some of the rows in the right table.
// supposedly, the table will fill it with missing values ...
final int newLength = (int) (0.8 * allLength);
DataRow[] shuffledAndTruncRightRows = new DataRow[newLength];
System.arraycopy(shuffledRightRows, 0, shuffledAndTruncRightRows, 0, newLength);
Hashtable<RowKey, DataRow> newHash = new Hashtable<RowKey, DataRow>(rightHash);
for (int i = newLength; i < allLength; i++) {
RowKey removeMe = shuffledRightRows[i].getKey();
newHash.remove(removeMe);
}
DataTable shuffleAndTruncRightTable = new DefaultTable(shuffledAndTruncRightRows, new DataTableSpec(rightCols));
t = new JoinedTable(leftTable, shuffleAndTruncRightTable);
count = checkForEquality(t, leftRows, newHash);
assertEquals(count, allLength);
// now cut shorten the left table
DataRow[] truncLeftRows = new DataRow[newLength];
System.arraycopy(leftRows, 0, truncLeftRows, 0, newLength);
DataTable truncLeftTable = new DefaultTable(truncLeftRows, new DataTableSpec(leftCols));
t = new JoinedTable(truncLeftTable, rightTable);
count = checkForEquality(t, truncLeftRows, rightHash);
assertEquals(count, allLength);
// tables share no rows at all
final int halfLength = allLength / 2;
DataRow[] halfLeftRows = new DataRow[halfLength];
DataRow[] halfRightRows = new DataRow[halfLength];
System.arraycopy(leftRows, 0, halfLeftRows, 0, halfLength);
System.arraycopy(rightRows, halfLength, halfRightRows, 0, halfLength);
Hashtable<RowKey, DataRow> halfRightHash = new Hashtable<RowKey, DataRow>();
for (int i = 0; i < halfLength; i++) {
DataRow current = halfRightRows[i];
halfRightHash.put(current.getKey(), current);
}
DataTable halfLeftTable = new DefaultTable(halfLeftRows, new DataTableSpec(leftCols));
DataTable halfRightTable = new DefaultTable(halfRightRows, new DataTableSpec(rightCols));
t = new JoinedTable(halfLeftTable, halfRightTable);
count = checkForEquality(t, halfLeftRows, halfRightHash);
assertEquals(count, 2 * halfLength);
// left table is empty
DataTable emptyLeftTable = new DefaultTable(new DataRow[0], new DataTableSpec(leftCols));
t = new JoinedTable(emptyLeftTable, halfRightTable);
count = checkForEquality(t, new DataRow[0], halfRightHash);
assertEquals(count, halfLength);
// right table is empty
DataTable emptyRightTable = new DefaultTable(new DataRow[0], new DataTableSpec(rightCols));
t = new JoinedTable(halfLeftTable, emptyRightTable);
count = checkForEquality(t, halfLeftRows, new Hashtable<RowKey, DataRow>());
assertEquals(count, halfLength);
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class JoinedTableTest method testGetDataTableSpec.
/**
* Test for getDataTableSpec().
*/
public final void testGetDataTableSpec() {
DataColumnSpec[] leftCols = new DataColumnSpec[3];
DataColumnSpec[] rightCols = new DataColumnSpec[3];
System.arraycopy(COLS, 0, leftCols, 0, 3);
System.arraycopy(COLS, 3, rightCols, 0, 3);
DataTable leftTable = new DefaultTable(new DataRow[0], new DataTableSpec(leftCols));
DataTable rightTable = new DefaultTable(new DataRow[0], new DataTableSpec(rightCols));
JoinedTable t = new JoinedTable(leftTable, rightTable);
DataTableSpec s = t.getDataTableSpec();
assert (!t.iterator().hasNext());
assertEquals(s.getNumColumns(), COLS.length);
for (int i = 0; i < COLS.length; i++) {
assertEquals(s.getColumnSpec(i), COLS[i]);
}
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class JoinerJoinAnyTest method testRunner.
private final void testRunner(final Joiner2Settings settings, final Integer[][] reference, final int numBitsInitial, final int numBitsMaximal) throws CanceledExecutionException, InvalidSettingsException {
// Create data with fields that consume a lot memory
DataTable leftInput = new IntegerTable(new String[] { "L1", "L2" }, new Integer[][] { new Integer[] { 0, 0 }, new Integer[] { 0, 1 }, new Integer[] { 1, 0 }, new Integer[] { 1, 1 }, new Integer[] { 2, 2 }, new Integer[] { 3, 3 }, new Integer[] { 4, 4 }, new Integer[] { 5, 5 }, new Integer[] { 6, 6 } });
DataTable rightInput = new IntegerTable(new String[] { "R1", "R2" }, new Integer[][] { new Integer[] { 0, 1 }, new Integer[] { 1, 0 }, new Integer[] { 1, 1 }, new Integer[] { 1, 2 }, new Integer[] { 2, 2 }, new Integer[] { 3, 3 }, new Integer[] { 10, 10 } });
BufferedDataTable bdtLeft = m_exec.createBufferedDataTable(leftInput, m_exec);
BufferedDataTable bdtRight = m_exec.createBufferedDataTable(rightInput, m_exec);
// run joiner
Joiner joiner = new Joiner(leftInput.getDataTableSpec(), rightInput.getDataTableSpec(), settings);
// force one bin only
joiner.setNumBitsInitial(numBitsInitial);
joiner.setNumBitsMaximal(numBitsMaximal);
BufferedDataTable output = joiner.computeJoinTable(bdtLeft, bdtRight, m_exec);
Integer[][] outputArray = toIntegerArray(output);
// Test for equality of the arrays
Assert.assertEquals(reference.length, outputArray.length);
for (int i = 0; i < reference.length; i++) {
Assert.assertArrayEquals(reference[i], outputArray[i]);
}
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class SorterNodeModelTest method testExecuteBufferedDataTableArrayExecutionContext.
/**
* Test method for {@link org.knime.base.node.preproc.sorter.SorterNodeModel#execute(org.knime.core.node.BufferedDataTable[], org.knime.core.node.ExecutionContext)}.
* @throws Exception
* @throws CanceledExecutionException
*/
@Test
public final void testExecuteBufferedDataTableArrayExecutionContext() throws CanceledExecutionException, Exception {
// try to sort a table with 1 entry
String[] columnNames = { "col1", "col2", "col3", "col4" };
DataType[] columnTypes = { DoubleCell.TYPE, StringCell.TYPE, IntCell.TYPE, DoubleCell.TYPE };
DataRow[] rows = new DataRow[1];
DataCell[] myRow = new DataCell[4];
myRow[0] = new DoubleCell(2.4325);
myRow[1] = new StringCell("Test");
myRow[2] = new IntCell(7);
myRow[3] = new DoubleCell(32432.324);
rows[0] = new DefaultRow(Integer.toString(1), myRow);
DataTable[] inputTable = { new DefaultTable(rows, columnNames, columnTypes) };
DataTable[] resultTable = { new DefaultTable(rows, columnNames, columnTypes) };
// set settings
String[] includeCols = { "col1", "col2", "col3", "col4" };
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
boolean[] sortorder = { true, true, true, true };
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable, EXEC_CONTEXT), EXEC_CONTEXT);
// test output
RowIterator rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
Assert.assertEquals(rows[0], rowIt.next());
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
// *********************************************//
// try to sort a large array of DataRows
// In this case we generate a unit matrix
// *********************************************//
// start with a little one
int dimension = 50;
// *********************************************//
// set settings
includeCols = new String[dimension];
for (int i = 0; i < dimension; i++) {
includeCols[i] = "col" + i;
}
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
sortorder = new boolean[dimension];
for (int i = 0; i < dimension; i++) {
sortorder[i] = true;
}
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
DataTable[] inputTable2 = { generateUnitMatrixTable(dimension) };
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable2, EXEC_CONTEXT), EXEC_CONTEXT);
// test output (should have sorted all rows in reverse order)
rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
int k = dimension - 1;
while (rowIt.hasNext()) {
RowKey rk = rowIt.next().getKey();
int ic = Integer.parseInt(rk.getString());
Assert.assertEquals(k, ic);
k--;
}
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
// *********************************************//
// try to sort a very large array of DataRows
// In this case we generate a unit matrix
// *********************************************//
// dimension 300 => 15,8 secs.
// dimension 500 => 49,7 secs.
dimension = 100;
// *********************************************//
// set settings
includeCols = new String[dimension];
for (int i = 0; i < dimension; i++) {
includeCols[i] = "col" + i;
}
m_settings.addStringArray(SorterNodeModel.INCLUDELIST_KEY, includeCols);
sortorder = new boolean[dimension];
for (int i = 0; i < dimension; i++) {
sortorder[i] = true;
}
m_settings.addBooleanArray(SorterNodeModel.SORTORDER_KEY, sortorder);
DataTable[] inputTable3 = { generateUnitMatrixTable(dimension) };
m_snm.loadValidatedSettingsFrom(m_settings);
resultTable = m_snm.execute(EXEC_CONTEXT.createBufferedDataTables(inputTable3, EXEC_CONTEXT), EXEC_CONTEXT);
// test output (should have sorted all rows in reverse order)
rowIt = resultTable[0].iterator();
Assert.assertTrue(rowIt.hasNext());
k = dimension - 1;
while (rowIt.hasNext()) {
RowKey rk = rowIt.next().getKey();
int ic = Integer.parseInt(rk.getString());
Assert.assertEquals(k, ic);
k--;
}
Assert.assertFalse(rowIt.hasNext());
m_snm.reset();
}
use of org.knime.core.data.DataTable in project knime-core by knime.
the class TableSorterTest method runMemoryTest.
private void runMemoryTest(final int numRows, final int maxNumRowsPerContainer, final int maxOpenContainers) throws CanceledExecutionException {
// Create data with fields that consume a lot memory
DataTable inputTable = new TestData(numRows, 1);
BufferedDataTable bdt = m_exec.createBufferedDataTable(inputTable, m_exec);
BufferedDataTableSorter sorter = new BufferedDataTableSorter(bdt, Arrays.asList("Index"), new boolean[] { true });
sorter.setMaxOpenContainers(maxOpenContainers);
BufferedDataTable defaultResult = sorter.sort(m_exec);
sorter.setMaxRows(maxNumRowsPerContainer);
// 10MB free memory
long currentlyUsed = MemoryAlertSystem.getUsedMemory();
double fraction = Math.min(1, (currentlyUsed + (10 << 20)) / (double) MemoryAlertSystem.getMaximumMemory());
MemoryAlertSystem.getInstance().setFractionUsageThreshold(fraction);
try {
sorter.setMemService(MemoryAlertSystem.getInstance());
// run again with change settings
BufferedDataTable result = sorter.sort(m_exec);
// Check if column is sorted in ascending order
int prevValue = Integer.MIN_VALUE;
for (DataRow row : result) {
int thisValue = ((IntValue) row.getCell(0)).getIntValue();
Assert.assertTrue(thisValue >= prevValue);
}
// Check if it has the same results as defaultResult
Assert.assertTrue(defaultResult.getRowCount() == result.getRowCount());
RowIterator defaultIter = defaultResult.iterator();
RowIterator iter = result.iterator();
while (defaultIter.hasNext()) {
DataRow defaultRow = defaultIter.next();
DataRow row = iter.next();
Assert.assertTrue(defaultRow.getKey().getString().equals(row.getKey().getString()));
Iterator<DataCell> defaultCellIter = defaultRow.iterator();
Iterator<DataCell> cellIter = row.iterator();
while (defaultCellIter.hasNext()) {
Assert.assertTrue(defaultCellIter.next().equals(cellIter.next()));
}
}
} finally {
MemoryAlertSystem.getInstance().setFractionUsageThreshold(MemoryAlertSystem.DEFAULT_USAGE_THRESHOLD);
}
}
Aggregations