Search in sources :

Example 26 with PageBuilder

use of io.prestosql.spi.PageBuilder in project hetu-core by openlookeng.

the class BenchmarkGroupedTopNBuilder method createInputPage.

private static Page createInputPage(int positions, List<Type> types) {
    PageBuilder pageBuilder = new PageBuilder(types);
    LineItemGenerator lineItemGenerator = new LineItemGenerator(1, 1, 1);
    Iterator<LineItem> iterator = lineItemGenerator.iterator();
    for (int i = 0; i < positions; i++) {
        pageBuilder.declarePosition();
        LineItem lineItem = iterator.next();
        DOUBLE.writeDouble(pageBuilder.getBlockBuilder(EXTENDED_PRICE), lineItem.getExtendedPrice());
        DOUBLE.writeDouble(pageBuilder.getBlockBuilder(DISCOUNT), lineItem.getDiscount());
        DATE.writeLong(pageBuilder.getBlockBuilder(SHIP_DATE), lineItem.getShipDate());
        DOUBLE.writeDouble(pageBuilder.getBlockBuilder(QUANTITY), lineItem.getQuantity());
    }
    return pageBuilder.build();
}
Also used : LineItem(io.airlift.tpch.LineItem) PageBuilder(io.prestosql.spi.PageBuilder) LineItemGenerator(io.airlift.tpch.LineItemGenerator)

Example 27 with PageBuilder

use of io.prestosql.spi.PageBuilder in project boostkit-bigdata by kunpengcompute.

the class HiveFileFormatBenchmark method createTpchDataSet.

private static <E extends TpchEntity> TestData createTpchDataSet(FileFormat format, TpchTable<E> tpchTable, List<TpchColumn<E>> columns) {
    List<String> columnNames = columns.stream().map(TpchColumn::getColumnName).collect(toList());
    List<Type> columnTypes = columns.stream().map(HiveFileFormatBenchmark::getColumnType).map(type -> format.supportsDate() || !DATE.equals(type) ? type : createUnboundedVarcharType()).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    long dataSize = 0;
    for (E row : tpchTable.createGenerator(10, 1, 1)) {
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            TpchColumn<E> column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(row));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(row));
                    break;
                case DATE:
                    if (format.supportsDate()) {
                        DATE.writeLong(blockBuilder, column.getDate(row));
                    } else {
                        createUnboundedVarcharType().writeString(blockBuilder, column.getString(row));
                    }
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(row));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(row)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
        if (pageBuilder.isFull()) {
            Page page = pageBuilder.build();
            pages.add(page);
            pageBuilder.reset();
            dataSize += page.getSizeInBytes();
            if (dataSize >= MIN_DATA_SIZE) {
                break;
            }
        }
    }
    return new TestData(columnNames, columnTypes, pages.build());
}
Also used : MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) RunResult(org.openjdk.jmh.results.RunResult) LINE_ITEM(io.airlift.tpch.TpchTable.LINE_ITEM) Random(java.util.Random) Warmup(org.openjdk.jmh.annotations.Warmup) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) HiveConfig(io.prestosql.plugin.hive.HiveConfig) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) HiveTestUtils(io.prestosql.plugin.hive.HiveTestUtils) TearDown(org.openjdk.jmh.annotations.TearDown) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Setup(org.openjdk.jmh.annotations.Setup) BlockBuilder(io.prestosql.spi.block.BlockBuilder) Param(org.openjdk.jmh.annotations.Param) ArrayType(io.prestosql.spi.type.ArrayType) Collection(java.util.Collection) UUID(java.util.UUID) PageBuilder(io.prestosql.spi.PageBuilder) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) String.format(java.lang.String.format) TpchTable(io.airlift.tpch.TpchTable) UncheckedIOException(java.io.UncheckedIOException) TpchEntity(io.airlift.tpch.TpchEntity) OptionsBuilder(org.openjdk.jmh.runner.options.OptionsBuilder) DataSize(io.airlift.units.DataSize) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveCompressionCodec(io.prestosql.plugin.hive.HiveCompressionCodec) Options(org.openjdk.jmh.runner.options.Options) TpchColumn(io.airlift.tpch.TpchColumn) Measurement(org.openjdk.jmh.annotations.Measurement) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ORDERS(io.airlift.tpch.TpchTable.ORDERS) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) Scope(org.openjdk.jmh.annotations.Scope) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) OrderColumn(io.airlift.tpch.OrderColumn) ArrayList(java.util.ArrayList) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) Statistics(org.openjdk.jmh.util.Statistics) AuxCounters(org.openjdk.jmh.annotations.AuxCounters) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) Runner(org.openjdk.jmh.runner.Runner) HadoopNative(io.prestosql.hadoop.HadoopNative) Page(io.prestosql.spi.Page) IOException(java.io.IOException) State(org.openjdk.jmh.annotations.State) Benchmark(org.openjdk.jmh.annotations.Benchmark) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) Fork(org.openjdk.jmh.annotations.Fork) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) IntArrays(it.unimi.dsi.fastutil.ints.IntArrays) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) ImmutableList(com.google.common.collect.ImmutableList) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) Type(io.prestosql.spi.type.Type) ArrayType(io.prestosql.spi.type.ArrayType) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) BlockBuilder(io.prestosql.spi.block.BlockBuilder)

Example 28 with PageBuilder

use of io.prestosql.spi.PageBuilder in project boostkit-bigdata by kunpengcompute.

the class TestHivePageSink method writeTestFile.

private static long writeTestFile(HiveConfig config, HiveMetastore metastore, String outputPath) {
    HiveTransactionHandle transaction = new HiveTransactionHandle();
    HiveWriterStats stats = new HiveWriterStats();
    ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath), stats);
    List<LineItemColumn> columns = getTestColumns();
    List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(HiveTestUtils.TYPE_MANAGER)).collect(toList());
    PageBuilder pageBuilder = new PageBuilder(columnTypes);
    int rows = 0;
    for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
        rows++;
        if (rows >= NUM_ROWS) {
            break;
        }
        pageBuilder.declarePosition();
        for (int i = 0; i < columns.size(); i++) {
            LineItemColumn column = columns.get(i);
            BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
            switch(column.getType().getBase()) {
                case IDENTIFIER:
                    BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
                    break;
                case INTEGER:
                    INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
                    break;
                case DATE:
                    DATE.writeLong(blockBuilder, column.getDate(lineItem));
                    break;
                case DOUBLE:
                    DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
                    break;
                case VARCHAR:
                    createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
                    break;
                default:
                    throw new IllegalArgumentException("Unsupported type " + column.getType());
            }
        }
    }
    Page page = pageBuilder.build();
    pageSink.appendPage(page);
    getFutureValue(pageSink.finish());
    File outputDir = new File(outputPath);
    List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
    File outputFile = getOnlyElement(files);
    long length = outputFile.length();
    ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
    List<Page> pages = new ArrayList<>();
    while (!pageSource.isFinished()) {
        Page nextPage = pageSource.getNextPage();
        if (nextPage != null) {
            pages.add(nextPage.getLoadedPage());
        }
    }
    MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
    MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
    assertEquals(results, expectedResults);
    assertEquals(round(stats.getInputPageSizeInBytes().getAllTime().getMax()), page.getRetainedSizeInBytes());
    return length;
}
Also used : Path(org.apache.hadoop.fs.Path) NONE(io.prestosql.plugin.hive.HiveCompressionCodec.NONE) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) Test(org.testng.annotations.Test) TpchColumnTypes(io.airlift.tpch.TpchColumnTypes) MaterializedResult(io.prestosql.testing.MaterializedResult) Assert.assertEquals(io.prestosql.testing.assertions.Assert.assertEquals) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Math.round(java.lang.Math.round) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Slices(io.airlift.slice.Slices) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) Mockito.doAnswer(org.mockito.Mockito.doAnswer) Path(org.apache.hadoop.fs.Path) Matchers.anyInt(org.mockito.Matchers.anyInt) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) TpchColumnType(io.airlift.tpch.TpchColumnType) PageIndexerFactory(io.prestosql.spi.PageIndexerFactory) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(io.prestosql.spi.block.BlockBuilder) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) PageBuilder(io.prestosql.spi.PageBuilder) String.format(java.lang.String.format) LineItemGenerator(io.airlift.tpch.LineItemGenerator) PageIndexer(io.prestosql.spi.PageIndexer) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Stream(java.util.stream.Stream) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) Optional(java.util.Optional) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) JsonCodec(io.airlift.json.JsonCodec) Mockito.mock(org.mockito.Mockito.mock) LineItem(io.airlift.tpch.LineItem) HivePageSinkMetadata(io.prestosql.plugin.hive.metastore.HivePageSinkMetadata) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) OptionalInt(java.util.OptionalInt) DIRECT_TO_TARGET_NEW_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) ArrayList(java.util.ArrayList) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) GenericExceptionAction(io.prestosql.plugin.hive.authentication.GenericExceptionAction) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Matchers.anyObject(org.mockito.Matchers.anyObject) LineItemColumn(io.airlift.tpch.LineItemColumn) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) DATE(io.prestosql.spi.type.DateType.DATE) FileHiveMetastore.createTestingFileHiveMetastore(io.prestosql.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) Properties(java.util.Properties) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) TypeManager(io.prestosql.spi.type.TypeManager) Page(io.prestosql.spi.Page) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) Mockito.when(org.mockito.Mockito.when) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) Collectors.toList(java.util.stream.Collectors.toList) IntArrayBlock(io.prestosql.spi.block.IntArrayBlock) HIVE_DATE(io.prestosql.plugin.hive.HiveType.HIVE_DATE) HIVE_DOUBLE(io.prestosql.plugin.hive.HiveType.HIVE_DOUBLE) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) Collections(java.util.Collections) LineItemColumn(io.airlift.tpch.LineItemColumn) ArrayList(java.util.ArrayList) LineItem(io.airlift.tpch.LineItem) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Type(io.prestosql.spi.type.Type) TpchColumnType(io.airlift.tpch.TpchColumnType) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) MaterializedResult(io.prestosql.testing.MaterializedResult) File(java.io.File) LineItemGenerator(io.airlift.tpch.LineItemGenerator) BlockBuilder(io.prestosql.spi.block.BlockBuilder)

Example 29 with PageBuilder

use of io.prestosql.spi.PageBuilder in project boostkit-bigdata by kunpengcompute.

the class TestMergingPageIterator method testMerging.

@Test
public void testMerging() {
    List<Type> types = ImmutableList.of(INTEGER, INTEGER);
    List<Integer> sortIndexes = ImmutableList.of(1);
    List<SortOrder> sortOrders = ImmutableList.of(SortOrder.ASC_NULLS_FIRST);
    List<List<Page>> pageLists = new ArrayList<>();
    PageBuilder pageBuilder = new PageBuilder(types);
    for (int i = 0; i < 10; i++) {
        Iterator<Integer> values = IntStream.range(0, 1000).map(ignored -> ThreadLocalRandom.current().nextInt(100_000)).mapToObj(n -> ((n % 100) == 0) ? null : n).sorted(nullsFirst(naturalOrder())).iterator();
        List<Page> pages = new ArrayList<>();
        for (int j = 0; j < 10; j++) {
            for (int k = 0; k < 100; k++) {
                Integer n = values.next();
                pageBuilder.declarePosition();
                if (n == null) {
                    pageBuilder.getBlockBuilder(0).appendNull();
                    pageBuilder.getBlockBuilder(1).appendNull();
                } else {
                    INTEGER.writeLong(pageBuilder.getBlockBuilder(0), n);
                    INTEGER.writeLong(pageBuilder.getBlockBuilder(1), n * 22L);
                }
            }
            pages.add(pageBuilder.build());
            pageBuilder.reset();
        }
        pageLists.add(pages);
        assertFalse(values.hasNext());
    }
    List<Iterator<Page>> pages = pageLists.stream().map(List::iterator).collect(toList());
    Iterator<Page> iterator = new MergingPageIterator(pages, types, sortIndexes, sortOrders);
    List<Long> values = new ArrayList<>();
    while (iterator.hasNext()) {
        Page page = iterator.next();
        for (int i = 0; i < page.getPositionCount(); i++) {
            if (page.getBlock(0).isNull(i)) {
                assertTrue(page.getBlock(1).isNull(i));
                values.add(null);
            } else {
                long x = INTEGER.getLong(page.getBlock(0), i);
                long y = INTEGER.getLong(page.getBlock(1), i);
                assertEquals(y, x * 22);
                values.add(x);
            }
        }
    }
    assertThat(values).isSortedAccordingTo(nullsFirst(naturalOrder()));
}
Also used : IntStream(java.util.stream.IntStream) Comparator.nullsFirst(java.util.Comparator.nullsFirst) Iterator(java.util.Iterator) Comparator.naturalOrder(java.util.Comparator.naturalOrder) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assert.assertEquals(org.testng.Assert.assertEquals) Page(io.prestosql.spi.Page) Test(org.testng.annotations.Test) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) PageBuilder(io.prestosql.spi.PageBuilder) SortOrder(io.prestosql.spi.block.SortOrder) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) Assert.assertTrue(org.testng.Assert.assertTrue) Type(io.prestosql.spi.type.Type) Assert.assertFalse(org.testng.Assert.assertFalse) ArrayList(java.util.ArrayList) SortOrder(io.prestosql.spi.block.SortOrder) Page(io.prestosql.spi.Page) PageBuilder(io.prestosql.spi.PageBuilder) Type(io.prestosql.spi.type.Type) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ImmutableList(com.google.common.collect.ImmutableList) Test(org.testng.annotations.Test)

Example 30 with PageBuilder

use of io.prestosql.spi.PageBuilder in project hetu-core by openlookeng.

the class InMemorySortAggregationBuilder method buildResult.

private WorkProcessor<Page> buildResult(IntIterator groupIds, AggregationNode.Step step, boolean isFinalizedValuePresent) {
    final PageBuilder pageBuilder = new PageBuilder(buildTypes(step, isFinalizedValuePresent));
    return WorkProcessor.create(() -> {
        if (!groupIds.hasNext()) {
            return WorkProcessor.ProcessState.finished();
        }
        pageBuilder.reset();
        List<Type> types = groupBy.getTypes();
        while (!pageBuilder.isFull() && groupIds.hasNext()) {
            int groupId = groupIds.nextInt();
            groupBy.appendValuesTo(groupId, pageBuilder, 0);
            pageBuilder.declarePosition();
            for (int i = 0; i < aggregators.size(); i++) {
                Aggregator aggregator = aggregators.get(i);
                BlockBuilder output = pageBuilder.getBlockBuilder(types.size() + i);
                aggregator.evaluate(groupId, output);
            }
            if (isFinalizedValuePresent && step == AggregationNode.Step.PARTIAL) {
                BlockBuilder output1 = pageBuilder.getBlockBuilder(types.size() + aggregators.size());
                if (groupId == 0 || !groupIds.hasNext()) {
                    BOOLEAN.writeBoolean(output1, false);
                } else {
                    // finalized values are set to true
                    BOOLEAN.writeBoolean(output1, true);
                }
            }
        }
        return WorkProcessor.ProcessState.ofResult(pageBuilder.build());
    });
}
Also used : Type(io.prestosql.spi.type.Type) BooleanType(io.prestosql.spi.type.BooleanType) PageBuilder(io.prestosql.spi.PageBuilder) BlockBuilder(io.prestosql.spi.block.BlockBuilder)

Aggregations

PageBuilder (io.prestosql.spi.PageBuilder)58 Page (io.prestosql.spi.Page)28 BlockBuilder (io.prestosql.spi.block.BlockBuilder)27 Type (io.prestosql.spi.type.Type)24 ImmutableList (com.google.common.collect.ImmutableList)18 Test (org.testng.annotations.Test)14 List (java.util.List)11 Block (io.prestosql.spi.block.Block)10 ArrayList (java.util.ArrayList)9 INTEGER (io.prestosql.spi.type.IntegerType.INTEGER)8 Collectors.toList (java.util.stream.Collectors.toList)8 Slice (io.airlift.slice.Slice)7 Benchmark (org.openjdk.jmh.annotations.Benchmark)7 Slices (io.airlift.slice.Slices)6 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)6 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)6 ArrayType (io.prestosql.spi.type.ArrayType)6 BIGINT (io.prestosql.spi.type.BigintType.BIGINT)6 DOUBLE (io.prestosql.spi.type.DoubleType.DOUBLE)6 VarcharType.createUnboundedVarcharType (io.prestosql.spi.type.VarcharType.createUnboundedVarcharType)6