Search in sources :

Example 71 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class AbstractColumnTableSorter method sortOnDisk.

/**
 * Sorts the given data table using a disk-based k-way merge sort.
 *
 * @param dataTable the data table that sgetRowCounthould be sorted
 * @param exec an execution context for reporting progress and creating BufferedDataContainers
 * @throws CanceledExecutionException if the user has canceled execution
 */
private void sortOnDisk(final DataTable dataTable, final ExecutionMonitor exec, final SortingConsumer resultListener) throws CanceledExecutionException {
    final List<AbstractTableSorter> columnPartitions = new ArrayList<>(m_sortDescriptions.length);
    // Each sorting description is done as a single external merge sort of their parts of the data
    for (int i = 0; i < m_sortDescriptions.length; i++) {
        AbstractTableSorter tableSorter = createTableSorter(m_rowCount, m_sortDescriptions[i].createDataTableSpec(m_dataTableSpec), m_sortDescriptions[i]);
        columnPartitions.add(tableSorter);
    }
    exec.setMessage("Reading table");
    RowIterator iterator = dataTable.iterator();
    ExecutionMonitor readProgress = exec.createSubProgress(0.7);
    // phase one: create as big chunks as possible from the given input table
    // for each sort description
    int chunkCount = 0;
    long currentTotalRows = 0L;
    while (iterator.hasNext()) {
        LOGGER.debugWithFormat("Reading temporary tables -- (chunk %d)", chunkCount);
        assert m_buffer.values().stream().allMatch(l -> l.isEmpty());
        long bufferedRows = fillBuffer(iterator, exec);
        LOGGER.debugWithFormat("Writing temporary tables -- (chunk %d with %d rows)", chunkCount, bufferedRows);
        currentTotalRows += bufferedRows;
        readProgress.setProgress(currentTotalRows / (double) m_rowCount, String.format("Writing temporary tables (chunk %d with %d rows)", chunkCount, bufferedRows));
        chunkCount += 1;
        LOGGER.debugWithFormat("Sorting temporary tables -- (chunk %d with %d rows)", chunkCount, bufferedRows);
        sortBufferInParallel();
        for (AbstractTableSorter tableSorter : columnPartitions) {
            tableSorter.openChunk();
        }
        LOGGER.debugWithFormat("Writing temporary tables (chunk %d with %d rows)", chunkCount, bufferedRows);
        for (int i = 0; i < m_sortDescriptions.length; i++) {
            SortingDescription sortingDescription = m_sortDescriptions[i];
            LOGGER.debugWithFormat("Writing temporary table (chunk %d, column %d)", chunkCount, i);
            AbstractTableSorter tableSorter = columnPartitions.get(i);
            ListIterator<DataRow> rowIterator = m_buffer.get(sortingDescription).listIterator();
            while (rowIterator.hasNext()) {
                tableSorter.addRowToChunk(rowIterator.next());
                // release the row as early as possible
                rowIterator.set(null);
            }
            exec.checkCanceled();
        }
        for (AbstractTableSorter tableSorter : columnPartitions) {
            tableSorter.closeChunk();
        }
        clearBuffer();
    }
    readProgress.setProgress(1.0);
    // phase 2: merge the temporary tables
    exec.setMessage("Merging temporary tables.");
    ExecutionMonitor mergingProgress = exec.createSubProgress(0.3);
    List<Iterator<DataRow>> partitionRowIterators = mergePartitions(columnPartitions, mergingProgress, chunkCount);
    // publish the results to the listener
    List<DataRow> currentRow = new ArrayList<>();
    long rowNo = 0;
    Iterator<DataRow> firstPartitionIterator = partitionRowIterators.isEmpty() ? Collections.<DataRow>emptyList().iterator() : partitionRowIterators.get(0);
    while (firstPartitionIterator.hasNext()) {
        for (int i = 0; i < partitionRowIterators.size(); i++) {
            currentRow.add(partitionRowIterators.get(i).next());
        }
        resultListener.consume(aggregateRows(new RowKey("AutoGenerated" + rowNo++), currentRow));
        currentRow.clear();
    }
}
Also used : RowKey(org.knime.core.data.RowKey) ArrayList(java.util.ArrayList) BlobSupportDataRow(org.knime.core.data.container.BlobSupportDataRow) DataRow(org.knime.core.data.DataRow) RowIterator(org.knime.core.data.RowIterator) ListIterator(java.util.ListIterator) Iterator(java.util.Iterator) RowIterator(org.knime.core.data.RowIterator) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 72 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class DataContainerTest method testBigFile.

// testAddRowToTable()
/**
 * Try a big file :-).
 */
public void testBigFile() {
    // with these setting (50, 1000) it will write an 250MB cache file
    // (the latest data this value was checked: 31. August 2006...)
    final int colCount = 50;
    final int rowCount = 100;
    String[] names = new String[colCount];
    DataType[] types = new DataType[colCount];
    for (int c = 0; c < colCount; c++) {
        names[c] = "Column " + c;
        switch(c % 3) {
            case 0:
                types[c] = DoubleCell.TYPE;
                break;
            case 1:
                types[c] = StringCell.TYPE;
                break;
            case 2:
                types[c] = IntCell.TYPE;
                break;
            default:
                throw new InternalError();
        }
    }
    DataTableSpec spec = new DataTableSpec(names, types);
    names = null;
    types = null;
    DataContainer container = new DataContainer(spec);
    final ObjectToDataCellConverter conv = new ObjectToDataCellConverter();
    final long seed = System.currentTimeMillis();
    Random rand = new Random(seed);
    for (int i = 0; i < rowCount; i++) {
        DataRow row = createRandomRow(i, colCount, rand, conv);
        container.addRowToTable(row);
    }
    container.close();
    final Throwable[] throwables = new Throwable[1];
    final DataTable table = container.getTable();
    Runnable runnable = new Runnable() {

        @Override
        public void run() {
            try {
                int i = 0;
                Random rand1 = new Random(seed);
                for (RowIterator it = table.iterator(); it.hasNext(); i++) {
                    DataRow row1 = createRandomRow(i, colCount, rand1, conv);
                    DataRow row2 = it.next();
                    assertEquals(row1, row2);
                }
                assertEquals(i, rowCount);
            } catch (Throwable t) {
                throwables[0] = t;
            }
        }
    };
    // Runnable
    // make two threads read the buffer (file) concurrently.
    Thread t1 = new Thread(runnable);
    Thread t2 = new Thread(runnable);
    t1.start();
    t2.start();
    try {
        // seems that the event dispatch thread must not release the
        // reference to the table, otherwise it is (I guess!!) garbage
        // collected: You comment these lines and see the error message.
        t1.join();
        t2.join();
    } catch (InterruptedException ie) {
        ie.printStackTrace();
        fail();
    }
    if (throwables[0] != null) {
        throw new RuntimeException(throwables[0]);
    }
}
Also used : DataTable(org.knime.core.data.DataTable) DataTableSpec(org.knime.core.data.DataTableSpec) ObjectToDataCellConverter(org.knime.core.data.util.ObjectToDataCellConverter) DataRow(org.knime.core.data.DataRow) Random(java.util.Random) RowIterator(org.knime.core.data.RowIterator) DataType(org.knime.core.data.DataType)

Example 73 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class DataContainerTest method testMemoryAlertWhileWrite.

public void testMemoryAlertWhileWrite() throws Exception {
    DataContainer cont = new DataContainer(SPEC_STR_INT_DBL, true, 1000000);
    int nrRows = 10;
    RowIterator it = generateRows(nrRows);
    int i = 0;
    for (; i < nrRows / 2; i++) {
        cont.addRowToTable(it.next());
    }
    Buffer buffer = cont.getBuffer();
    synchronized (buffer) {
        buffer.writeAllRowsFromListToFile();
    }
    for (; i < nrRows; i++) {
        cont.addRowToTable(it.next());
    }
    cont.close();
    RowIterator tableIT = cont.getTable().iterator();
    for (RowIterator r = generateRows(nrRows); r.hasNext(); ) {
        DataRow expected = r.next();
        DataRow actual = tableIT.next();
        assertEquals(expected, actual);
    }
}
Also used : RowIterator(org.knime.core.data.RowIterator) DataRow(org.knime.core.data.DataRow)

Example 74 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class DataContainerTest method testRowOrder.

/**
 * method being tested: addRowToTable().
 */
public final void testRowOrder() {
    // addRow should preserve the order, we try here randomly generated
    // IntCells as key (the container puts it in a linked has map)
    DataCell[] values = new DataCell[0];
    Vector<RowKey> order = new Vector<RowKey>(500);
    for (int i = 0; i < 500; i++) {
        // fill it - this should be easy to preserve (as the int value
        // is also the hash code)
        order.add(new RowKey(Integer.toString(i)));
    }
    // shuffle it - that should screw it up
    Collections.shuffle(order);
    DataContainer c = new DataContainer(EMPTY_SPEC);
    for (RowKey key : order) {
        c.addRowToTable(new DefaultRow(key, values));
    }
    c.close();
    DataTable table = c.getTable();
    int pos = 0;
    for (RowIterator it = table.iterator(); it.hasNext(); pos++) {
        DataRow cur = it.next();
        assertEquals(cur.getKey().getString(), order.get(pos).getString());
    }
    assertEquals(pos, order.size());
}
Also used : DataTable(org.knime.core.data.DataTable) RowKey(org.knime.core.data.RowKey) RowIterator(org.knime.core.data.RowIterator) DataCell(org.knime.core.data.DataCell) DefaultRow(org.knime.core.data.def.DefaultRow) Vector(java.util.Vector) DataRow(org.knime.core.data.DataRow)

Example 75 with RowIterator

use of org.knime.core.data.RowIterator in project knime-core by knime.

the class DefaultTableTest method createWorkingTables.

// testDefaultTable()
/**
 * Creates some new <code>DefaultTable</code> objects that should ...
 * theoretically ... do.
 */
private void createWorkingTables() {
    // some constructors with (supposedly) no problems
    DefaultTable t1 = new DefaultTable(CASE_1_CONTENT, CASE_1_ROWHEADER, CASE_1_COLHEADER);
    DataTableSpec t1Spec = t1.getDataTableSpec();
    assertEquals(CASE_1_COLHEADER.length, t1Spec.getNumColumns());
    // check spec
    for (int c = 0; c < CASE_1_COLHEADER.length; c++) {
        DataColumnSpec currentColumnSpec = t1Spec.getColumnSpec(c);
        String colName = currentColumnSpec.getName().toString();
        DataType type = currentColumnSpec.getType();
        assertEquals(colName, CASE_1_COLHEADER[c]);
        assertTrue(type.isCompatible(StringValue.class));
    }
    // check content
    int r = 0;
    for (RowIterator it = t1.iterator(); it.hasNext(); r++) {
        DataRow row = it.next();
        assertEquals(row.getNumCells(), CASE_1_COLHEADER.length);
        for (int i = 0; i < CASE_1_COLHEADER.length; i++) {
            StringValue cell = (StringValue) row.getCell(i);
            assertEquals(cell.getStringValue(), CASE_1_CONTENT[r][i]);
        }
    }
    // all one-dimensional arrays (meta-info) are optional
    new DefaultTable(CASE_1_CONTENT, null, CASE_1_COLHEADER);
    new DefaultTable(CASE_1_CONTENT, CASE_1_ROWHEADER, null);
    new DefaultTable(CASE_1_CONTENT, CASE_1_ROWHEADER, CASE_1_COLHEADER);
    new DefaultTable(CASE_1_CONTENT, null, null);
    new DefaultTable(CASE_1_CONTENT, null, CASE_1_COLHEADER);
    new DefaultTable(CASE_1_CONTENT, null, null);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) DataColumnSpec(org.knime.core.data.DataColumnSpec) RowIterator(org.knime.core.data.RowIterator) DataType(org.knime.core.data.DataType) StringValue(org.knime.core.data.StringValue) DataRow(org.knime.core.data.DataRow)

Aggregations

RowIterator (org.knime.core.data.RowIterator)77 DataRow (org.knime.core.data.DataRow)62 DataCell (org.knime.core.data.DataCell)28 DataTableSpec (org.knime.core.data.DataTableSpec)20 RowKey (org.knime.core.data.RowKey)16 DoubleValue (org.knime.core.data.DoubleValue)14 BufferedDataTable (org.knime.core.node.BufferedDataTable)13 DataColumnSpec (org.knime.core.data.DataColumnSpec)11 ArrayList (java.util.ArrayList)9 DefaultRow (org.knime.core.data.def.DefaultRow)8 PreparedStatement (java.sql.PreparedStatement)7 DataType (org.knime.core.data.DataType)6 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)6 HashSet (java.util.HashSet)5 Random (java.util.Random)5 TimeZone (java.util.TimeZone)5 DataTable (org.knime.core.data.DataTable)5 DoubleCell (org.knime.core.data.def.DoubleCell)5 StringCell (org.knime.core.data.def.StringCell)5 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)5