Search in sources :

Example 1 with RecordPageSource

use of com.facebook.presto.spi.RecordPageSource in project presto by prestodb.

the class TestHiveFileFormats method testCursorProvider.

private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, int rowCount) throws IOException {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe());
    splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName)));
    splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), HiveType.valueOf(input.getObjectInspector().getTypeName()), (String) input.getWriteValue())).collect(toList());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), "test", new Configuration(), SESSION, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), splitProperties, TupleDomain.all(), getColumnHandles(testColumns), partitionKeys, DateTimeZone.getDefault(), TYPE_MANAGER, ImmutableMap.of());
    RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
    checkCursor(cursor, testColumns, rowCount);
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) ORC(com.facebook.presto.hive.HiveStorageFormat.ORC) Iterables.transform(com.google.common.collect.Iterables.transform) OrcPageSourceFactory(com.facebook.presto.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test) RowType(com.facebook.presto.type.RowType) FileSplit(org.apache.hadoop.mapred.FileSplit) Predicates.not(com.google.common.base.Predicates.not) Configuration(org.apache.hadoop.conf.Configuration) AVRO(com.facebook.presto.hive.HiveStorageFormat.AVRO) Path(org.apache.hadoop.fs.Path) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ImmutableCollectors.toImmutableList(com.facebook.presto.util.ImmutableCollectors.toImmutableList) TEXTFILE(com.facebook.presto.hive.HiveStorageFormat.TEXTFILE) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) DWRF(com.facebook.presto.hive.HiveStorageFormat.DWRF) StructuralTestUtil.rowBlockOf(com.facebook.presto.tests.StructuralTestUtil.rowBlockOf) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) StructuralTestUtil.arrayBlockOf(com.facebook.presto.tests.StructuralTestUtil.arrayBlockOf) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) TYPE_MANAGER(com.facebook.presto.hive.HiveTestUtils.TYPE_MANAGER) Optional(java.util.Optional) INTEGER(com.facebook.presto.spi.type.IntegerType.INTEGER) Iterables.filter(com.google.common.collect.Iterables.filter) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) ArrayType(com.facebook.presto.type.ArrayType) HiveTestUtils.getTypes(com.facebook.presto.hive.HiveTestUtils.getTypes) RcFilePageSourceFactory(com.facebook.presto.hive.rcfile.RcFilePageSourceFactory) Assert.assertEquals(org.testng.Assert.assertEquals) PrestoException(com.facebook.presto.spi.PrestoException) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) PARQUET(com.facebook.presto.hive.HiveStorageFormat.PARQUET) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SESSION(com.facebook.presto.hive.HiveTestUtils.SESSION) DwrfPageSourceFactory(com.facebook.presto.hive.orc.DwrfPageSourceFactory) RCTEXT(com.facebook.presto.hive.HiveStorageFormat.RCTEXT) Objects.requireNonNull(java.util.Objects.requireNonNull) ParquetRecordCursorProvider(com.facebook.presto.hive.parquet.ParquetRecordCursorProvider) JSON(com.facebook.presto.hive.HiveStorageFormat.JSON) SEQUENCEFILE(com.facebook.presto.hive.HiveStorageFormat.SEQUENCEFILE) Properties(java.util.Properties) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) File(java.io.File) RCBINARY(com.facebook.presto.hive.HiveStorageFormat.RCBINARY) VarcharType.createUnboundedVarcharType(com.facebook.presto.spi.type.VarcharType.createUnboundedVarcharType) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) HDFS_ENVIRONMENT(com.facebook.presto.hive.HiveTestUtils.HDFS_ENVIRONMENT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) ParquetPageSourceFactory(com.facebook.presto.hive.parquet.ParquetPageSourceFactory) RecordCursor(com.facebook.presto.spi.RecordCursor) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) RecordPageSource(com.facebook.presto.spi.RecordPageSource)

Example 2 with RecordPageSource

use of com.facebook.presto.spi.RecordPageSource in project presto by prestodb.

the class SystemPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, List<ColumnHandle> columns) {
    requireNonNull(columns, "columns is null");
    SystemTransactionHandle systemTransaction = (SystemTransactionHandle) transactionHandle;
    SystemSplit systemSplit = (SystemSplit) split;
    SchemaTableName tableName = systemSplit.getTableHandle().getSchemaTableName();
    SystemTable systemTable = tables.get(tableName);
    checkArgument(systemTable != null, "Table %s does not exist", tableName);
    List<ColumnMetadata> tableColumns = systemTable.getTableMetadata().getColumns();
    Map<String, Integer> columnsByName = new HashMap<>();
    for (int i = 0; i < tableColumns.size(); i++) {
        ColumnMetadata column = tableColumns.get(i);
        if (columnsByName.put(column.getName(), i) != null) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Duplicate column name: " + column.getName());
        }
    }
    ImmutableList.Builder<Integer> userToSystemFieldIndex = ImmutableList.builder();
    for (ColumnHandle column : columns) {
        String columnName = ((SystemColumnHandle) column).getColumnName();
        Integer index = columnsByName.get(columnName);
        if (index == null) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Column does not exist: %s.%s", tableName, columnName));
        }
        userToSystemFieldIndex.add(index);
    }
    TupleDomain<ColumnHandle> constraint = systemSplit.getConstraint();
    ImmutableMap.Builder<Integer, Domain> newConstraints = ImmutableMap.builder();
    for (Map.Entry<ColumnHandle, Domain> entry : constraint.getDomains().get().entrySet()) {
        String columnName = ((SystemColumnHandle) entry.getKey()).getColumnName();
        newConstraints.put(columnsByName.get(columnName), entry.getValue());
    }
    TupleDomain<Integer> newContraint = withColumnDomains(newConstraints.build());
    try {
        return new MappedPageSource(systemTable.pageSource(systemTransaction.getConnectorTransactionHandle(), session, newContraint), userToSystemFieldIndex.build());
    } catch (UnsupportedOperationException e) {
        return new RecordPageSource(new MappedRecordSet(toRecordSet(systemTransaction.getConnectorTransactionHandle(), systemTable, session, newContraint), userToSystemFieldIndex.build()));
    }
}
Also used : ColumnMetadata(com.facebook.presto.spi.ColumnMetadata) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) ImmutableCollectors.toImmutableList(com.facebook.presto.util.ImmutableCollectors.toImmutableList) PrestoException(com.facebook.presto.spi.PrestoException) RecordPageSource(com.facebook.presto.spi.RecordPageSource) MappedRecordSet(com.facebook.presto.split.MappedRecordSet) ColumnHandle(com.facebook.presto.spi.ColumnHandle) SchemaTableName(com.facebook.presto.spi.SchemaTableName) ImmutableMap(com.google.common.collect.ImmutableMap) MappedPageSource(com.facebook.presto.split.MappedPageSource) SystemTable(com.facebook.presto.spi.SystemTable) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) Domain(com.facebook.presto.spi.predicate.Domain) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 3 with RecordPageSource

use of com.facebook.presto.spi.RecordPageSource in project presto by prestodb.

the class ThriftIndexedTpchService method createLookupPageSource.

@Override
protected ConnectorPageSource createLookupPageSource(SplitInfo splitInfo, List<String> outputColumnNames) {
    IndexedTable indexedTable = indexedData.getIndexedTable(splitInfo.getTableName(), schemaNameToScaleFactor(splitInfo.getSchemaName()), ImmutableSet.copyOf(splitInfo.getLookupColumnNames())).orElseThrow(() -> new IllegalArgumentException(String.format("No such index: %s%s", splitInfo.getTableName(), splitInfo.getLookupColumnNames())));
    List<Type> lookupColumnTypes = types(splitInfo.getTableName(), splitInfo.getLookupColumnNames());
    RecordSet keyRecordSet = new MappedRecordSet(new ListBasedRecordSet(splitInfo.getKeys(), lookupColumnTypes), computeRemap(splitInfo.getLookupColumnNames(), indexedTable.getKeyColumns()));
    RecordSet outputRecordSet = lookupIndexKeys(keyRecordSet, indexedTable, outputColumnNames);
    return new RecordPageSource(outputRecordSet);
}
Also used : Type(com.facebook.presto.common.type.Type) MappedRecordSet(com.facebook.presto.split.MappedRecordSet) RecordSet(com.facebook.presto.spi.RecordSet) MappedRecordSet(com.facebook.presto.split.MappedRecordSet) IndexedTable(com.facebook.presto.tests.tpch.TpchIndexedData.IndexedTable) RecordPageSource(com.facebook.presto.spi.RecordPageSource)

Example 4 with RecordPageSource

use of com.facebook.presto.spi.RecordPageSource in project presto by prestodb.

the class TestScanFilterAndProjectOperator method testRecordCursorSource.

@Test
public void testRecordCursorSource() {
    final Page input = SequencePageBuilder.createSequencePage(ImmutableList.of(VARCHAR), 10_000, 0);
    DriverContext driverContext = newDriverContext();
    List<RowExpression> projections = ImmutableList.of(field(0, VARCHAR));
    Supplier<CursorProcessor> cursorProcessor = expressionCompiler.compileCursorProcessor(driverContext.getSession().getSqlFunctionProperties(), Optional.empty(), projections, "key");
    Supplier<PageProcessor> pageProcessor = expressionCompiler.compilePageProcessor(driverContext.getSession().getSqlFunctionProperties(), Optional.empty(), projections);
    ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory factory = new ScanFilterAndProjectOperator.ScanFilterAndProjectOperatorFactory(0, new PlanNodeId("test"), new PlanNodeId("0"), (session, split, table, columns) -> new RecordPageSource(new PageRecordSet(ImmutableList.of(VARCHAR), input)), cursorProcessor, pageProcessor, TESTING_TABLE_HANDLE, ImmutableList.of(), ImmutableList.of(VARCHAR), Optional.empty(), new DataSize(0, BYTE), 0);
    SourceOperator operator = factory.createOperator(driverContext);
    operator.addSplit(new Split(new ConnectorId("test"), TestingTransactionHandle.create(), TestingSplit.createLocalSplit()));
    operator.noMoreSplits();
    MaterializedResult expected = toMaterializedResult(driverContext.getSession(), ImmutableList.of(VARCHAR), ImmutableList.of(input));
    MaterializedResult actual = toMaterializedResult(driverContext.getSession(), ImmutableList.of(VARCHAR), toPages(operator));
    assertEquals(actual.getRowCount(), expected.getRowCount());
    assertEquals(actual, expected);
}
Also used : CursorProcessor(com.facebook.presto.operator.project.CursorProcessor) RowExpression(com.facebook.presto.spi.relation.RowExpression) Page(com.facebook.presto.common.Page) PageRecordSet(com.facebook.presto.operator.index.PageRecordSet) RecordPageSource(com.facebook.presto.spi.RecordPageSource) PlanNodeId(com.facebook.presto.spi.plan.PlanNodeId) PageProcessor(com.facebook.presto.operator.project.PageProcessor) DataSize(io.airlift.units.DataSize) TestingSplit(com.facebook.presto.testing.TestingSplit) Split(com.facebook.presto.metadata.Split) OperatorAssertion.toMaterializedResult(com.facebook.presto.operator.OperatorAssertion.toMaterializedResult) MaterializedResult(com.facebook.presto.testing.MaterializedResult) ConnectorId(com.facebook.presto.spi.ConnectorId) Test(org.testng.annotations.Test)

Example 5 with RecordPageSource

use of com.facebook.presto.spi.RecordPageSource in project presto by prestodb.

the class HivePageSourceProvider method createHivePageSource.

public static Optional<ConnectorPageSource> createHivePageSource(Set<HiveRecordCursorProvider> cursorProviders, Set<HivePageSourceFactory> pageSourceFactories, String clientId, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> hiveColumns, List<HivePartitionKey> partitionKeys, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, Map<Integer, HiveType> columnCoercions) {
    List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(partitionKeys, hiveColumns, columnCoercions, path, bucketNumber);
    List<ColumnMapping> regularColumnMappings = ColumnMapping.extractRegularColumnMappings(columnMappings);
    for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
        Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, start, length, schema, extractRegularColumnHandles(regularColumnMappings, true), effectivePredicate, hiveStorageTimeZone);
        if (pageSource.isPresent()) {
            return Optional.of(new HivePageSource(columnMappings, hiveStorageTimeZone, typeManager, pageSource.get()));
        }
    }
    for (HiveRecordCursorProvider provider : cursorProviders) {
        // GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
        boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
        Optional<RecordCursor> cursor = provider.createRecordCursor(clientId, configuration, session, path, start, length, schema, extractRegularColumnHandles(regularColumnMappings, doCoercion), effectivePredicate, hiveStorageTimeZone, typeManager);
        if (cursor.isPresent()) {
            RecordCursor delegate = cursor.get();
            // Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
            if (doCoercion) {
                delegate = new HiveCoercionRecordCursor(regularColumnMappings, typeManager, delegate);
            }
            HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, hiveStorageTimeZone, typeManager, delegate);
            List<Type> columnTypes = hiveColumns.stream().map(input -> typeManager.getType(input.getTypeSignature())).collect(toList());
            return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
        }
    }
    return Optional.empty();
}
Also used : RecordPageSource(com.facebook.presto.spi.RecordPageSource) DateTimeZone(org.joda.time.DateTimeZone) TypeManager(com.facebook.presto.spi.type.TypeManager) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) OptionalInt(java.util.OptionalInt) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) Inject(javax.inject.Inject) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ImmutableList(com.google.common.collect.ImmutableList) Type(com.facebook.presto.spi.type.Type) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) Path(org.apache.hadoop.fs.Path) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ColumnMapping.extractRegularColumnHandles(com.facebook.presto.hive.HivePageSourceProvider.ColumnMapping.extractRegularColumnHandles) ImmutableSet(com.google.common.collect.ImmutableSet) Properties(java.util.Properties) HiveUtil.getPrefilledColumnValue(com.facebook.presto.hive.HiveUtil.getPrefilledColumnValue) Set(java.util.Set) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ConnectorSession(com.facebook.presto.spi.ConnectorSession) TupleDomain(com.facebook.presto.spi.predicate.TupleDomain) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) RecordCursor(com.facebook.presto.spi.RecordCursor) List(java.util.List) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) ColumnHandle(com.facebook.presto.spi.ColumnHandle) Optional(java.util.Optional) RecordCursor(com.facebook.presto.spi.RecordCursor) RecordPageSource(com.facebook.presto.spi.RecordPageSource) Type(com.facebook.presto.spi.type.Type)

Aggregations

RecordPageSource (com.facebook.presto.spi.RecordPageSource)14 ConnectorPageSource (com.facebook.presto.spi.ConnectorPageSource)7 ImmutableList (com.google.common.collect.ImmutableList)6 ConnectorSession (com.facebook.presto.spi.ConnectorSession)5 PrestoException (com.facebook.presto.spi.PrestoException)5 RecordCursor (com.facebook.presto.spi.RecordCursor)5 Test (org.testng.annotations.Test)5 ColumnHandle (com.facebook.presto.spi.ColumnHandle)4 SchemaTableName (com.facebook.presto.spi.SchemaTableName)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 Configuration (org.apache.hadoop.conf.Configuration)4 CacheConfig (com.facebook.presto.cache.CacheConfig)3 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)3 Storage (com.facebook.presto.hive.metastore.Storage)3 TestingConnectorSession (com.facebook.presto.testing.TestingConnectorSession)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ImmutableSet (com.google.common.collect.ImmutableSet)3 DataSize (io.airlift.units.DataSize)3 List (java.util.List)3 Map (java.util.Map)3