Search in sources :

Example 6 with RecordPageSource

use of io.prestosql.spi.connector.RecordPageSource in project hetu-core by openlookeng.

the class TestHiveFileFormats method testCursorProvider.

private void testCursorProvider(HiveRecordCursorProvider cursorProvider, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerDe());
    splitProperties.setProperty("columns", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getName)));
    splitProperties.setProperty("columns.types", Joiner.on(',').join(transform(filter(testColumns, not(TestColumn::isPartitionKey)), TestColumn::getType)));
    List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
    Configuration configuration = new Configuration();
    configuration.set("io.compression.codecs", LzoCodec.class.getName() + "," + LzopCodec.class.getName());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(cursorProvider), ImmutableSet.of(), configuration, session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, TupleDomain.all(), getColumnHandles(testColumns), partitionKeys, TYPE_MANAGER, ImmutableMap.of(), Optional.empty(), false, Optional.empty(), Optional.empty(), Optional.empty(), Optional.empty(), null, false, -1L, ImmutableMap.of(), ImmutableList.of());
    RecordCursor cursor = ((RecordPageSource) pageSource.get()).getCursor();
    checkCursor(cursor, testColumns, rowCount);
}
Also used : Iterables.transform(com.google.common.collect.Iterables.transform) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) Test(org.testng.annotations.Test) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) FileSplit(org.apache.hadoop.mapred.FileSplit) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Predicates.not(com.google.common.base.Predicates.not) Locale(java.util.Locale) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Duration(java.time.Duration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ParquetPageSourceFactory(io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory) HDFS_ENVIRONMENT(io.prestosql.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) Optional(java.util.Optional) Iterables.filter(com.google.common.collect.Iterables.filter) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) Logger(io.airlift.log.Logger) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) Assert.assertEquals(org.testng.Assert.assertEquals) RcFilePageSourceFactory(io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) RecordCursor(io.prestosql.spi.connector.RecordCursor) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) Properties(java.util.Properties) TupleDomain(io.prestosql.spi.predicate.TupleDomain) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) File(java.io.File) OrcWriterOptions(io.prestosql.orc.OrcWriterOptions) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) Collectors.toList(java.util.stream.Collectors.toList) OrcCacheStore(io.prestosql.orc.OrcCacheStore) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) RecordCursor(io.prestosql.spi.connector.RecordCursor) Configuration(org.apache.hadoop.conf.Configuration) LzoCodec(io.airlift.compress.lzo.LzoCodec) Properties(java.util.Properties) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) RecordPageSource(io.prestosql.spi.connector.RecordPageSource)

Example 7 with RecordPageSource

use of io.prestosql.spi.connector.RecordPageSource in project hetu-core by openlookeng.

the class SystemPageSourceProvider method createPageSource.

@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns) {
    requireNonNull(columns, "columns is null");
    SystemTransactionHandle systemTransaction = (SystemTransactionHandle) transaction;
    SystemSplit systemSplit = (SystemSplit) split;
    SchemaTableName tableName = ((SystemTableHandle) table).getSchemaTableName();
    SystemTable systemTable = tables.getSystemTable(session, tableName).orElseThrow(() -> new PrestoException(NOT_FOUND, String.format("Table %s not found", tableName)));
    List<ColumnMetadata> tableColumns = systemTable.getTableMetadata().getColumns();
    Map<String, Integer> columnsByName = new HashMap<>();
    for (int i = 0; i < tableColumns.size(); i++) {
        ColumnMetadata column = tableColumns.get(i);
        if (columnsByName.put(column.getName(), i) != null) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, "Duplicate column name: " + column.getName());
        }
    }
    ImmutableList.Builder<Integer> userToSystemFieldIndex = ImmutableList.builder();
    for (ColumnHandle column : columns) {
        String columnName = ((SystemColumnHandle) column).getColumnName();
        Integer index = columnsByName.get(columnName);
        if (index == null) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, String.format("Column does not exist: %s.%s", tableName, columnName));
        }
        userToSystemFieldIndex.add(index);
    }
    TupleDomain<ColumnHandle> constraint = systemSplit.getConstraint();
    ImmutableMap.Builder<Integer, Domain> newConstraints = ImmutableMap.builder();
    for (Map.Entry<ColumnHandle, Domain> entry : constraint.getDomains().get().entrySet()) {
        String columnName = ((SystemColumnHandle) entry.getKey()).getColumnName();
        newConstraints.put(columnsByName.get(columnName), entry.getValue());
    }
    TupleDomain<Integer> newContraint = withColumnDomains(newConstraints.build());
    try {
        return new MappedPageSource(systemTable.pageSource(systemTransaction.getConnectorTransactionHandle(), session, newContraint), userToSystemFieldIndex.build());
    } catch (UnsupportedOperationException e) {
        return new RecordPageSource(new MappedRecordSet(toRecordSet(systemTransaction.getConnectorTransactionHandle(), systemTable, session, newContraint), userToSystemFieldIndex.build()));
    }
}
Also used : ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) HashMap(java.util.HashMap) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PrestoException(io.prestosql.spi.PrestoException) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) MappedRecordSet(io.prestosql.split.MappedRecordSet) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableMap(com.google.common.collect.ImmutableMap) MappedPageSource(io.prestosql.split.MappedPageSource) SystemTable(io.prestosql.spi.connector.SystemTable) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Domain(io.prestosql.spi.predicate.Domain) HashMap(java.util.HashMap) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 8 with RecordPageSource

use of io.prestosql.spi.connector.RecordPageSource in project hetu-core by openlookeng.

the class ThriftIndexedTpchService method createLookupPageSource.

@Override
protected ConnectorPageSource createLookupPageSource(SplitInfo splitInfo, List<String> outputColumnNames) {
    IndexedTable indexedTable = indexedData.getIndexedTable(splitInfo.getTableName(), schemaNameToScaleFactor(splitInfo.getSchemaName()), ImmutableSet.copyOf(splitInfo.getLookupColumnNames())).orElseThrow(() -> new IllegalArgumentException(format("No such index: %s%s", splitInfo.getTableName(), splitInfo.getLookupColumnNames())));
    List<Type> lookupColumnTypes = types(splitInfo.getTableName(), splitInfo.getLookupColumnNames());
    RecordSet keyRecordSet = new ListBasedRecordSet(splitInfo.getKeys(), lookupColumnTypes);
    RecordSet outputRecordSet = lookupIndexKeys(keyRecordSet, indexedTable, outputColumnNames);
    return new RecordPageSource(outputRecordSet);
}
Also used : Type(io.prestosql.spi.type.Type) RecordSet(io.prestosql.spi.connector.RecordSet) MappedRecordSet(io.prestosql.split.MappedRecordSet) IndexedTable(io.prestosql.tests.tpch.TpchIndexedData.IndexedTable) RecordPageSource(io.prestosql.spi.connector.RecordPageSource)

Example 9 with RecordPageSource

use of io.prestosql.spi.connector.RecordPageSource in project hetu-core by openlookeng.

the class TpchConnectorIndex method lookup.

@Override
public ConnectorPageSource lookup(RecordSet rawInputRecordSet) {
    // convert the input record set from the column ordering in the query to
    // match the column ordering of the index
    RecordSet inputRecordSet = keyFormatter.apply(rawInputRecordSet);
    // lookup the values in the index
    RecordSet rawOutputRecordSet = indexedTable.lookupKeys(inputRecordSet);
    // expect by the query
    return new RecordPageSource(outputFormatter.apply(rawOutputRecordSet));
}
Also used : RecordSet(io.prestosql.spi.connector.RecordSet) RecordPageSource(io.prestosql.spi.connector.RecordPageSource)

Example 10 with RecordPageSource

use of io.prestosql.spi.connector.RecordPageSource in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method assertPageSourceType.

protected static void assertPageSourceType(ConnectorPageSource connectorPageSource, HiveStorageFormat hiveStorageFormat) {
    ConnectorPageSource pageSource = connectorPageSource;
    if (pageSource instanceof OrcConcatPageSource) {
        pageSource = ((OrcConcatPageSource) pageSource).getConnectorPageSource();
    }
    if (pageSource instanceof RecordPageSource) {
        RecordCursor hiveRecordCursor = ((RecordPageSource) pageSource).getCursor();
        hiveRecordCursor = ((HiveRecordCursor) hiveRecordCursor).getRegularColumnRecordCursor();
        if (hiveRecordCursor instanceof HiveCoercionRecordCursor) {
            hiveRecordCursor = ((HiveCoercionRecordCursor) hiveRecordCursor).getRegularColumnRecordCursor();
        }
        assertInstanceOf(hiveRecordCursor, recordCursorType(hiveStorageFormat), hiveStorageFormat.name());
    } else {
        assertInstanceOf(((HivePageSource) pageSource).getPageSource(), pageSourceType(hiveStorageFormat), hiveStorageFormat.name());
    }
}
Also used : RecordCursor(io.prestosql.spi.connector.RecordCursor) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) RecordPageSource(io.prestosql.spi.connector.RecordPageSource)

Aggregations

RecordPageSource (io.prestosql.spi.connector.RecordPageSource)11 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)4 RecordCursor (io.prestosql.spi.connector.RecordCursor)4 RecordSet (io.prestosql.spi.connector.RecordSet)4 Test (org.testng.annotations.Test)4 ImmutableList (com.google.common.collect.ImmutableList)3 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)3 ImmutableMap (com.google.common.collect.ImmutableMap)3 PrestoException (io.prestosql.spi.PrestoException)3 TupleDomain (io.prestosql.spi.predicate.TupleDomain)3 Joiner (com.google.common.base.Joiner)2 Predicates.not (com.google.common.base.Predicates.not)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Iterables.filter (com.google.common.collect.Iterables.filter)2 Iterables.transform (com.google.common.collect.Iterables.transform)2 Lists (com.google.common.collect.Lists)2 LzoCodec (io.airlift.compress.lzo.LzoCodec)2 LzopCodec (io.airlift.compress.lzo.LzopCodec)2 Logger (io.airlift.log.Logger)2 Slices (io.airlift.slice.Slices)2