Search in sources :

Example 61 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class TestHiveFileFormats method testCursorProvider.

private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) {
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
    List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
    List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
    Configuration configuration = new Configuration();
    configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), getColumnHandles(testColumns), ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
    RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
    checkCursor(cursor, testColumns, rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) VarcharType.createUnboundedVarcharType(com.facebook.presto.common.type.VarcharType.createUnboundedVarcharType) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) HIVE_PARTITION_SCHEMA_MISMATCH(com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) FileSplit(org.apache.hadoop.mapred.FileSplit) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LzoCodec(io.airlift.compress.lzo.LzoCodec) DwrfBatchPageSourceFactory(com.facebook.presto.hive.orc.DwrfBatchPageSourceFactory) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) StorageFormat(com.facebook.presto.hive.metastore.StorageFormat) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ConnectorSession(com.facebook.presto.spi.ConnectorSession) INTEGER(com.facebook.presto.common.type.IntegerType.INTEGER) HIVE_CLIENT_CONFIG(com.facebook.presto.hive.HiveTestUtils.HIVE_CLIENT_CONFIG) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) FUNCTION_RESOLUTION(com.facebook.presto.hive.HiveTestUtils.FUNCTION_RESOLUTION) ROW_EXPRESSION_SERVICE(com.facebook.presto.hive.HiveTestUtils.ROW_EXPRESSION_SERVICE) Lists(com.google.common.collect.Lists) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) BOOLEAN(com.facebook.presto.common.type.BooleanType.BOOLEAN) CSV(com.facebook.presto.hive.HiveStorageFormat.CSV) ArrayType(com.facebook.presto.common.type.ArrayType) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) BIGINT(com.facebook.presto.common.type.BigintType.BIGINT) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) File(java.io.File) FUNCTION_AND_TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.FUNCTION_AND_TYPE_MANAGER) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) ParquetFileWriterFactory(com.facebook.presto.hive.parquet.ParquetFileWriterFactory) RowType(com.facebook.presto.common.type.RowType) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) CacheConfig(com.facebook.presto.cache.CacheConfig) SchemaTableName(com.facebook.presto.spi.SchemaTableName) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) DOUBLE(com.facebook.presto.common.type.DoubleType.DOUBLE) TimeZone(java.util.TimeZone) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) TRUE_CONSTANT(com.facebook.presto.expressions.LogicalRowExpressions.TRUE_CONSTANT) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ErrorCodeSupplier(com.facebook.presto.spi.ErrorCodeSupplier) DataProvider(org.testng.annotations.DataProvider) Column(com.facebook.presto.hive.metastore.Column) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) HIVE_INVALID_PARTITION_VALUE(com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) NO_ENCRYPTION(com.facebook.presto.hive.HiveDwrfEncryptionProvider.NO_ENCRYPTION) StructuralTestUtil.mapBlockOf(com.facebook.presto.tests.StructuralTestUtil.mapBlockOf) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) Objects.requireNonNull(java.util.Objects.requireNonNull) DEFAULT_HIVE_FILE_CONTEXT(com.facebook.presto.hive.HiveFileContext.DEFAULT_HIVE_FILE_CONTEXT) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Storage(com.facebook.presto.hive.metastore.Storage) Assert.fail(org.testng.Assert.fail) OrcBatchPageSourceFactory(com.facebook.presto.hive.orc.OrcBatchPageSourceFactory) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordCursor(com.facebook.presto.spi.RecordCursor) Configuration(org.apache.hadoop.conf.Configuration) LzoCodec(io.airlift.compress.lzo.LzoCodec) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) RecordPageSource(com.facebook.presto.spi.RecordPageSource) Storage(com.facebook.presto.hive.metastore.Storage) Column(com.facebook.presto.hive.metastore.Column)

Example 62 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class ThriftTpchService method getRowsSync.

private PrestoThriftPageResult getRowsSync(PrestoThriftId splitId, List<String> outputColumns, long maxBytes, PrestoThriftNullableToken nextToken) {
    SplitInfo splitInfo = SPLIT_INFO_CODEC.fromJson(splitId.getId());
    checkArgument(maxBytes >= DEFAULT_MAX_PAGE_SIZE_IN_BYTES, "requested maxBytes is too small");
    ConnectorPageSource pageSource;
    if (!splitInfo.isIndexSplit()) {
        // normal scan
        pageSource = createPageSource(splitInfo, outputColumns);
    } else {
        // index lookup
        pageSource = createLookupPageSource(splitInfo, outputColumns);
    }
    return getRowsInternal(pageSource, splitInfo.getTableName(), outputColumns, nextToken.getToken());
}
Also used : ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Example 63 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class ShardCompactor method compactSorted.

public List<ShardInfo> compactSorted(long transactionId, boolean tableSupportsDeltaDelete, OptionalInt bucketNumber, Map<UUID, Optional<UUID>> uuidsMap, List<ColumnInfo> columns, List<Long> sortColumnIds, List<SortOrder> sortOrders) throws IOException {
    checkArgument(sortColumnIds.size() == sortOrders.size(), "sortColumnIds and sortOrders must be of the same size");
    long start = System.nanoTime();
    List<Long> columnIds = columns.stream().map(ColumnInfo::getColumnId).collect(toList());
    List<Type> columnTypes = columns.stream().map(ColumnInfo::getType).collect(toList());
    checkArgument(columnIds.containsAll(sortColumnIds), "sortColumnIds must be a subset of columnIds");
    List<Integer> sortIndexes = sortColumnIds.stream().map(columnIds::indexOf).collect(toList());
    Queue<SortedPageSource> rowSources = new PriorityQueue<>();
    StoragePageSink outputPageSink = storageManager.createStoragePageSink(DEFAULT_RAPTOR_CONTEXT, transactionId, bucketNumber, columnIds, columnTypes, false);
    try {
        uuidsMap.forEach((uuid, deltaUuid) -> {
            ConnectorPageSource pageSource = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, deltaUuid, tableSupportsDeltaDelete, bucketNumber, columnIds, columnTypes, TupleDomain.all(), readerAttributes);
            SortedPageSource rowSource = new SortedPageSource(pageSource, columnTypes, sortIndexes, sortOrders);
            rowSources.add(rowSource);
        });
        while (!rowSources.isEmpty()) {
            SortedPageSource rowSource = rowSources.poll();
            if (!rowSource.hasNext()) {
                // rowSource is empty, close it
                rowSource.close();
                continue;
            }
            outputPageSink.appendPages(ImmutableList.of(rowSource.next()));
            if (outputPageSink.isFull()) {
                outputPageSink.flush();
            }
            rowSources.add(rowSource);
        }
        outputPageSink.flush();
        List<ShardInfo> shardInfos = getFutureValue(outputPageSink.commit());
        int deltaCount = uuidsMap.values().stream().filter(Optional::isPresent).collect(toSet()).size();
        updateStats(uuidsMap.size(), deltaCount, shardInfos.size(), nanosSince(start).toMillis());
        return shardInfos;
    } catch (IOException | RuntimeException e) {
        outputPageSink.rollback();
        throw e;
    } finally {
        rowSources.forEach(SortedPageSource::closeQuietly);
    }
}
Also used : Optional(java.util.Optional) StoragePageSink(com.facebook.presto.raptor.storage.StoragePageSink) IOException(java.io.IOException) PriorityQueue(java.util.PriorityQueue) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) Type(com.facebook.presto.common.type.Type) ShardInfo(com.facebook.presto.raptor.metadata.ShardInfo)

Example 64 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class TestShardCompactor method getPages.

private List<Page> getPages(StorageManager storageManager, Set<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException {
    ImmutableList.Builder<Page> pages = ImmutableList.builder();
    for (UUID uuid : uuids) {
        try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid, Optional.empty(), false)) {
            while (!pageSource.isFinished()) {
                Page outputPage = pageSource.getNextPage();
                if (outputPage == null) {
                    break;
                }
                pages.add(outputPage.getLoadedPage());
            }
        }
    }
    return pages.build();
}
Also used : ImmutableList(com.google.common.collect.ImmutableList) Page(com.facebook.presto.common.Page) UUID(java.util.UUID) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Example 65 with ConnectorPageSource

use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.

the class TestShardCompactor method getMaterializedRows.

private MaterializedResult getMaterializedRows(StorageManager storageManager, List<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException {
    MaterializedResult.Builder rows = MaterializedResult.resultBuilder(SESSION, columnTypes);
    for (UUID uuid : uuids) {
        try (ConnectorPageSource pageSource = getPageSource(storageManager, columnIds, columnTypes, uuid, Optional.empty(), false)) {
            MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes);
            rows.rows(result.getMaterializedRows());
        }
    }
    return rows.build();
}
Also used : MaterializedResult(com.facebook.presto.testing.MaterializedResult) UUID(java.util.UUID) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource)

Aggregations

ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)66 ConnectorSession (com.facebook.presto.spi.ConnectorSession)36 ColumnHandle (com.facebook.presto.spi.ColumnHandle)29 Test (org.testng.annotations.Test)28 ImmutableList (com.google.common.collect.ImmutableList)27 ConnectorSplit (com.facebook.presto.spi.ConnectorSplit)25 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)25 Optional (java.util.Optional)25 List (java.util.List)23 Path (org.apache.hadoop.fs.Path)23 SchemaTableName (com.facebook.presto.spi.SchemaTableName)22 ImmutableMap (com.google.common.collect.ImmutableMap)19 IOException (java.io.IOException)19 Objects.requireNonNull (java.util.Objects.requireNonNull)19 Type (com.facebook.presto.common.type.Type)18 Map (java.util.Map)18 Configuration (org.apache.hadoop.conf.Configuration)18 PrestoException (com.facebook.presto.spi.PrestoException)17 String.format (java.lang.String.format)16 UUID (java.util.UUID)16