Search in sources :

Example 26 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class AbstractTestHiveClient method testTypesRcBinaryRecordCursor.

@Test
public void testTypesRcBinaryRecordCursor() throws Exception {
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        if (metadata.getTableHandle(session, new SchemaTableName(database, "presto_test_types_rcbinary")) == null) {
            return;
        }
        ConnectorTableHandle tableHandle = getTableHandle(metadata, new SchemaTableName(database, "presto_test_types_rcbinary"));
        ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
        HiveSplit hiveSplit = getHiveSplit(tableHandle);
        List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
        ConnectorPageSourceProvider pageSourceProvider = new HivePageSourceProvider(new HiveClientConfig().setTimeZone(timeZone.getID()), hdfsEnvironment, ImmutableSet.of(new ColumnarBinaryHiveRecordCursorProvider(hdfsEnvironment)), ImmutableSet.of(), TYPE_MANAGER);
        ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, columnHandles);
        assertGetRecords(RCBINARY, tableMetadata, hiveSplit, pageSource, columnHandles);
    }
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ConnectorTableHandle(com.facebook.presto.spi.ConnectorTableHandle) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ConnectorMetadata(com.facebook.presto.spi.connector.ConnectorMetadata) ConnectorTableMetadata(com.facebook.presto.spi.ConnectorTableMetadata) Test(org.testng.annotations.Test)

Example 27 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class ShardCompactor method compactSorted.

public List<ShardInfo> compactSorted(long transactionId, OptionalInt bucketNumber, Set<UUID> uuids, List<ColumnInfo> columns, List<Long> sortColumnIds, List<SortOrder> sortOrders) throws IOException {
    checkArgument(sortColumnIds.size() == sortOrders.size(), "sortColumnIds and sortOrders must be of the same size");
    long start = System.nanoTime();
    List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
    List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
    checkArgument(columnIds.containsAll(sortColumnIds), "sortColumnIds must be a subset of columnIds");
    List<Integer> sortIndexes = sortColumnIds.stream().map(columnIds::indexOf).collect(toList());
    Queue<SortedRowSource> rowSources = new PriorityQueue<>();
    StoragePageSink outputPageSink = storageManager.createStoragePageSink(transactionId, bucketNumber, columnIds, columnTypes, false);
    try {
        for (UUID uuid : uuids) {
            ConnectorPageSource pageSource = storageManager.getPageSource(uuid, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes);
            SortedRowSource rowSource = new SortedRowSource(pageSource, columnTypes, sortIndexes, sortOrders);
            rowSources.add(rowSource);
        }
        while (!rowSources.isEmpty()) {
            SortedRowSource rowSource = rowSources.poll();
            if (!rowSource.hasNext()) {
                // rowSource is empty, close it
                rowSource.close();
                continue;
            }
            outputPageSink.appendRow(rowSource.next());
            if (outputPageSink.isFull()) {
                outputPageSink.flush();
            }
            rowSources.add(rowSource);
        }
        outputPageSink.flush();
        List<ShardInfo> shardInfos = getFutureValue(outputPageSink.commit());
        updateStats(uuids.size(), shardInfos.size(), nanosSince(start).toMillis());
        return shardInfos;
    } catch (IOException | RuntimeException e) {
        outputPageSink.rollback();
        throw e;
    } finally {
        rowSources.forEach(SortedRowSource::closeQuietly);
    }
}
Also used : StoragePageSink(com.facebook.presto.raptor.storage.StoragePageSink) IOException(java.io.IOException) PriorityQueue(java.util.PriorityQueue) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Type(com.facebook.presto.spi.type.Type) UUID(java.util.UUID) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo)

Example 28 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class TestHiveFileFormats method testPageSourceFactory.

private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe());
    splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName)));
    splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue())).collect(toList());
    List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), "test", new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), splitProperties, TupleDomain.all(), columnHandles, partitionKeys, DateTimeZone.getDefault(), TYPE_MANAGER, ImmutableMap.of());
    assertTrue(pageSource.isPresent());
    checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) Iterables.transform(com.google.common.collect.Iterables.transform) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test) RowType(com.facebook.presto.type.RowType) FileSplit(org.apache.hadoop.mapred.FileSplit) Predicates.not(com.google.common.base.Predicates.not) Configuration(org.apache.hadoop.conf.Configuration) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ImmutableCollectors.toImmutableList(com.facebook.presto.util.ImmutableCollectors.toImmutableList) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ArrayType(com.facebook.presto.type.ArrayType) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) DwrfPageSourceFactory(com.facebook.presto.hive.orc.DwrfPageSourceFactory) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) Properties(java.util.Properties) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) File(java.io.File) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Example 29 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class HiveFileFormatBenchmark method read.

@Benchmark
public List<Page> read(CompressionCounter counter) throws IOException {
    if (!fileFormat.supports(data)) {
        throw new RuntimeException(fileFormat + " does not support data set " + dataSet);
    }
    List<Page> pages = new ArrayList<>(100);
    try (ConnectorPageSource pageSource = fileFormat.createFileFormatReader(SESSION, HDFS_ENVIRONMENT, dataFile, data.getColumnNames(), data.getColumnTypes())) {
        while (!pageSource.isFinished()) {
            Page page = pageSource.getNextPage();
            if (page != null) {
                page.assureLoaded();
                pages.add(page);
            }
        }
    }
    counter.inputSize += data.getSize();
    counter.outputSize += dataFile.length();
    return pages;
}
Also used : ArrayList(java.util.ArrayList) Page(com.facebook.presto.spi.Page) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Benchmark(org.openjdk.jmh.annotations.Benchmark)

Example 30 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class TestShardCompactor method getPages.

private List<Page> getPages(Set<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException {
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    for (UUID uuid : uuids) {
        try (ConnectorPageSource pageSource = getPageSource(columnIds, columnTypes, uuid)) {
            while (!pageSource.isFinished()) {
                Page outputPage = pageSource.getNextPage();
                if (outputPage == null) {
                    break;
                }
                outputPage.assureLoaded();
                pages.add(outputPage);
            }
        }
    }
    return pages.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) Page(com.facebook.presto.spi.Page) UUID(java.util.UUID) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Aggregations

ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)30 Test (org.testng.annotations.Test)18 ColumnHandle (com.facebook.presto.spi.ColumnHandle)17 ConnectorSession (com.facebook.presto.spi.ConnectorSession)15 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)13 MaterializedResult (com.facebook.presto.testing.MaterializedResult)12 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)11 ConnectorTableHandle (com.facebook.presto.spi.ConnectorTableHandle)10 ConnectorMetadata (com.facebook.presto.spi.connector.ConnectorMetadata)10 Type (com.facebook.presto.spi.type.Type)10 ImmutableList (com.google.common.collect.ImmutableList)10 UUID (java.util.UUID)10 Page (com.facebook.presto.spi.Page)9 TupleDomain (com.facebook.presto.spi.predicate.TupleDomain)9 List (java.util.List)9 Path (org.apache.hadoop.fs.Path)9 ConnectorTableMetadata (com.facebook.presto.spi.ConnectorTableMetadata)8 SchemaTableName (com.facebook.presto.spi.SchemaTableName)8 ConnectorPageSourceProvider (com.facebook.presto.spi.connector.ConnectorPageSourceProvider)8 Optional (java.util.Optional)8