Search in sources :

Example 46 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class TestOrcPageSourceMemoryTracking method testMaxReadBytes.

@Test(dataProvider = "rowCount")
public void testMaxReadBytes(int rowCount) throws Exception {
    int maxReadBytes = 1_000;
    HiveSessionProperties hiveSessionProperties = new HiveSessionProperties(new HiveConfig(), new OrcReaderConfig().setMaxBlockSize(DataSize.ofBytes(maxReadBytes)), new OrcWriterConfig(), new ParquetReaderConfig(), new ParquetWriterConfig());
    ConnectorSession session = TestingConnectorSession.builder().setPropertyMetadata(hiveSessionProperties.getSessionProperties()).build();
    FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
    // Build a table where every row gets larger, so we can test that the "batchSize" reduces
    int numColumns = 5;
    int step = 250;
    ImmutableList.Builder<TestColumn> columnBuilder = ImmutableList.<TestColumn>builder().add(new TestColumn("p_empty_string", javaStringObjectInspector, () -> "", true));
    GrowingTestColumn[] dataColumns = new GrowingTestColumn[numColumns];
    for (int i = 0; i < numColumns; i++) {
        dataColumns[i] = new GrowingTestColumn("p_string" + "_" + i, javaStringObjectInspector, () -> Long.toHexString(random.nextLong()), false, step * (i + 1));
        columnBuilder.add(dataColumns[i]);
    }
    List<TestColumn> testColumns = columnBuilder.build();
    File tempFile = File.createTempFile("trino_test_orc_page_source_max_read_bytes", "orc");
    tempFile.delete();
    TestPreparer testPreparer = new TestPreparer(tempFile.getAbsolutePath(), testColumns, rowCount, rowCount);
    ConnectorPageSource pageSource = testPreparer.newPageSource(stats, session);
    try {
        int positionCount = 0;
        while (true) {
            Page page = pageSource.getNextPage();
            if (pageSource.isFinished()) {
                break;
            }
            assertNotNull(page);
            page = page.getLoadedPage();
            positionCount += page.getPositionCount();
            // ignore the first MAX_BATCH_SIZE rows given the sizes are set when loading the blocks
            if (positionCount > MAX_BATCH_SIZE) {
                // either the block is bounded by maxReadBytes or we just load one single large block
                // an error margin MAX_BATCH_SIZE / step is needed given the block sizes are increasing
                assertTrue(page.getSizeInBytes() < maxReadBytes * (MAX_BATCH_SIZE / step) || 1 == page.getPositionCount());
            }
        }
        // verify the stats are correctly recorded
        Distribution distribution = stats.getMaxCombinedBytesPerRow().getAllTime();
        assertEquals((int) distribution.getCount(), 1);
        // the block is VariableWidthBlock that contains valueIsNull and offsets arrays as overhead
        assertEquals((int) distribution.getMax(), Arrays.stream(dataColumns).mapToInt(GrowingTestColumn::getMaxSize).sum() + (Integer.BYTES + Byte.BYTES) * numColumns);
        pageSource.close();
    } finally {
        tempFile.delete();
    }
}
Also used : ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ParquetWriterConfig(io.trino.plugin.hive.parquet.ParquetWriterConfig) OrcWriterConfig(io.trino.plugin.hive.orc.OrcWriterConfig) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) Distribution(io.airlift.stats.Distribution) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) Test(org.testng.annotations.Test)

Example 47 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class TestOrcPageSourceMemoryTracking method testTableScanOperator.

@Test
public void testTableScanOperator() {
    // Numbers used in assertions in this test may change when implementation is modified,
    // feel free to change them if they break in the future
    DriverContext driverContext = testPreparer.newDriverContext();
    SourceOperator operator = testPreparer.newTableScanOperator(driverContext);
    assertEquals(driverContext.getMemoryUsage(), 0);
    long memoryUsage = -1;
    int totalRows = 0;
    while (totalRows < 20000) {
        assertFalse(operator.isFinished());
        Page page = operator.getOutput();
        assertNotNull(page);
        page.getBlock(1);
        if (memoryUsage == -1) {
            memoryUsage = driverContext.getMemoryUsage();
            assertBetweenInclusive(memoryUsage, 460000L, 469999L);
        } else {
            assertEquals(driverContext.getMemoryUsage(), memoryUsage);
        }
        totalRows += page.getPositionCount();
    }
    memoryUsage = -1;
    while (totalRows < 40000) {
        assertFalse(operator.isFinished());
        Page page = operator.getOutput();
        assertNotNull(page);
        page.getBlock(1);
        if (memoryUsage == -1) {
            memoryUsage = driverContext.getMemoryUsage();
            assertBetweenInclusive(memoryUsage, 460000L, 469999L);
        } else {
            assertEquals(driverContext.getMemoryUsage(), memoryUsage);
        }
        totalRows += page.getPositionCount();
    }
    memoryUsage = -1;
    while (totalRows < NUM_ROWS) {
        assertFalse(operator.isFinished());
        Page page = operator.getOutput();
        assertNotNull(page);
        page.getBlock(1);
        if (memoryUsage == -1) {
            memoryUsage = driverContext.getMemoryUsage();
            assertBetweenInclusive(memoryUsage, 360000L, 369999L);
        } else {
            assertEquals(driverContext.getMemoryUsage(), memoryUsage);
        }
        totalRows += page.getPositionCount();
    }
    assertFalse(operator.isFinished());
    assertNull(operator.getOutput());
    assertTrue(operator.isFinished());
    assertEquals(driverContext.getMemoryUsage(), 0);
}
Also used : DriverContext(io.trino.operator.DriverContext) SourceOperator(io.trino.operator.SourceOperator) Page(io.trino.spi.Page) Test(org.testng.annotations.Test)

Example 48 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class TestOrcPageSourceMemoryTracking method testPageSource.

private void testPageSource(boolean useCache) throws Exception {
    // Numbers used in assertions in this test may change when implementation is modified,
    // feel free to change them if they break in the future
    FileFormatDataSourceStats stats = new FileFormatDataSourceStats();
    ConnectorPageSource pageSource = testPreparer.newPageSource(stats, useCache ? CACHED_SESSION : UNCACHED_SESSION);
    if (useCache) {
        // file is fully cached
        assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize(), testPreparer.getFileSize() + 200);
    } else {
        assertEquals(pageSource.getMemoryUsage(), 0);
    }
    long memoryUsage = -1;
    int totalRows = 0;
    while (totalRows < 20000) {
        assertFalse(pageSource.isFinished());
        Page page = pageSource.getNextPage();
        assertNotNull(page);
        Block block = page.getBlock(1);
        if (memoryUsage == -1) {
            // Memory usage before lazy-loading the block
            if (useCache) {
                assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize(), testPreparer.getFileSize() + 2000);
            } else {
                assertBetweenInclusive(pageSource.getMemoryUsage(), 0L, 1000L);
            }
            // trigger loading for lazy block
            createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
            memoryUsage = pageSource.getMemoryUsage();
            // Memory usage after lazy-loading the actual block
            if (useCache) {
                assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize() + 270_000, testPreparer.getFileSize() + 280_000);
            } else {
                assertBetweenInclusive(memoryUsage, 460_000L, 469_999L);
            }
        } else {
            assertEquals(pageSource.getMemoryUsage(), memoryUsage);
            // trigger loading for lazy block
            createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
            assertEquals(pageSource.getMemoryUsage(), memoryUsage);
        }
        totalRows += page.getPositionCount();
    }
    memoryUsage = -1;
    while (totalRows < 40000) {
        assertFalse(pageSource.isFinished());
        Page page = pageSource.getNextPage();
        assertNotNull(page);
        Block block = page.getBlock(1);
        if (memoryUsage == -1) {
            // Memory usage before lazy-loading the block
            if (useCache) {
                assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize(), testPreparer.getFileSize() + 2000);
            } else {
                assertBetweenInclusive(pageSource.getMemoryUsage(), 0L, 1000L);
            }
            // trigger loading for lazy block
            createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
            memoryUsage = pageSource.getMemoryUsage();
            // Memory usage after lazy-loading the actual block
            if (useCache) {
                assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize() + 270_000, testPreparer.getFileSize() + 280_000);
            } else {
                assertBetweenInclusive(memoryUsage, 460_000L, 469_999L);
            }
        } else {
            assertEquals(pageSource.getMemoryUsage(), memoryUsage);
            // trigger loading for lazy block
            createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
            assertEquals(pageSource.getMemoryUsage(), memoryUsage);
        }
        totalRows += page.getPositionCount();
    }
    memoryUsage = -1;
    while (totalRows < NUM_ROWS) {
        assertFalse(pageSource.isFinished());
        Page page = pageSource.getNextPage();
        assertNotNull(page);
        Block block = page.getBlock(1);
        if (memoryUsage == -1) {
            // Memory usage before lazy-loading the block
            if (useCache) {
                assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize(), testPreparer.getFileSize() + 2000);
            } else {
                assertBetweenInclusive(pageSource.getMemoryUsage(), 0L, 1000L);
            }
            // trigger loading for lazy block
            createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
            memoryUsage = pageSource.getMemoryUsage();
            // Memory usage after loading the actual block
            if (useCache) {
                assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize() + 260_000, testPreparer.getFileSize() + 270_000);
            } else {
                assertBetweenInclusive(memoryUsage, 360_000L, 369_999L);
            }
        } else {
            assertEquals(pageSource.getMemoryUsage(), memoryUsage);
            // trigger loading for lazy block
            createUnboundedVarcharType().getSlice(block, block.getPositionCount() - 1);
            assertEquals(pageSource.getMemoryUsage(), memoryUsage);
        }
        totalRows += page.getPositionCount();
    }
    assertFalse(pageSource.isFinished());
    assertNull(pageSource.getNextPage());
    assertTrue(pageSource.isFinished());
    if (useCache) {
        // file is fully cached
        assertBetweenInclusive(pageSource.getMemoryUsage(), testPreparer.getFileSize(), testPreparer.getFileSize() + 200);
    } else {
        assertEquals(pageSource.getMemoryUsage(), 0);
    }
    pageSource.close();
}
Also used : Block(io.trino.spi.block.Block) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource)

Example 49 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class BenchmarkFileFormatsUtils method createTpchDataSet.

public static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
    List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
    List<Type> columnTypes = columns.stream().map(BenchmarkFileFormatsUtils::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    long dataSize = 0;
    for (E row : tpchTable.createGenerator(10, 1, 1)) {
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            TpchColumn<E> column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(row));
                    break;
                case DATE:
                    if (format.supportsDate()) {
                        DATE.writeLong(blockBuilder, column.getDate(row));
                    } else {
                        createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
                    }
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
        if (pageBuilder.isFull()) {
            Page page = pageBuilder.build();
            pages.add(page);
            pageBuilder.reset();
            dataSize += page.getSizeInBytes();
            if (dataSize >= MIN_DATA_SIZE) {
                break;
            }
        }
    }
    if (!pageBuilder.isEmpty()) {
        pages.add(pageBuilder.build());
    }
    return new TestData(columnNames, columnTypes, pages.build());
}
Also used : PageBuilder(io.trino.spi.PageBuilder) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Type(io.trino.spi.type.Type) RunResult(org.openjdk.jmh.results.RunResult) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) Random(java.util.Random) Statistics(org.openjdk.jmh.util.Statistics) ImmutableList(com.google.common.collect.ImmutableList) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) INTEGER(io.trino.spi.type.IntegerType.INTEGER) TpchEntity(io.trino.tpch.TpchEntity) TpchTable(io.trino.tpch.TpchTable) Collection(java.util.Collection) IOException(java.io.IOException) File(java.io.File) String.format(java.lang.String.format) UncheckedIOException(java.io.UncheckedIOException) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) DataSize(io.airlift.units.DataSize) TpchColumn(io.trino.tpch.TpchColumn) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) BIGINT(io.trino.spi.type.BigintType.BIGINT) BlockBuilder(io.trino.spi.block.BlockBuilder) DATE(io.trino.spi.type.DateType.DATE) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) DATE(io.trino.spi.type.DateType.DATE) ImmutableList(com.google.common.collect.ImmutableList) Page(io.trino.spi.Page) PageBuilder(io.trino.spi.PageBuilder) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) BlockBuilder(io.trino.spi.block.BlockBuilder)

Example 50 with Page

use of io.trino.spi.Page in project trino by trinodb.

the class BenchmarkHiveFileFormat method read.

@Benchmark
public List<Page> read(CompressionCounter counter) throws IOException {
    if (!fileFormat.supports(data)) {
        throw new RuntimeException(fileFormat + " does not support data set " + dataSet);
    }
    List<Page> pages = new ArrayList<>(100);
    try (ConnectorPageSource pageSource = fileFormat.createFileFormatReader(SESSION, HDFS_ENVIRONMENT, dataFile, data.getColumnNames(), data.getColumnTypes())) {
        while (!pageSource.isFinished()) {
            Page page = pageSource.getNextPage();
            if (page != null) {
                pages.add(page.getLoadedPage());
            }
        }
    }
    counter.inputSize += data.getSize();
    counter.outputSize += dataFile.length();
    return pages;
}
Also used : ArrayList(java.util.ArrayList) Page(io.trino.spi.Page) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Aggregations

Page (io.trino.spi.Page)579 Test (org.testng.annotations.Test)334 Block (io.trino.spi.block.Block)153 Type (io.trino.spi.type.Type)127 MaterializedResult (io.trino.testing.MaterializedResult)109 PlanNodeId (io.trino.sql.planner.plan.PlanNodeId)91 RowPagesBuilder (io.trino.RowPagesBuilder)72 RunLengthEncodedBlock (io.trino.spi.block.RunLengthEncodedBlock)68 ImmutableList (com.google.common.collect.ImmutableList)65 ArrayList (java.util.ArrayList)48 BlockBuilder (io.trino.spi.block.BlockBuilder)46 Optional (java.util.Optional)43 TaskContext (io.trino.operator.TaskContext)42 TestingTaskContext (io.trino.testing.TestingTaskContext)41 List (java.util.List)41 DictionaryBlock (io.trino.spi.block.DictionaryBlock)38 OperatorAssertion.toMaterializedResult (io.trino.operator.OperatorAssertion.toMaterializedResult)37 Slice (io.airlift.slice.Slice)36 OperatorFactory (io.trino.operator.OperatorFactory)32 LazyBlock (io.trino.spi.block.LazyBlock)32