Search in sources :

Example 6 with TESTING_TYPE_MANAGER

use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.

the class TestHiveFileFormats method testPageSourceFactory.

private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testReadColumns, ConnectorSession session, long fileSize, int rowCount) throws IOException {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerde());
    // Use full columns in split properties
    ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
    ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
    Set<String> baseColumnNames = new HashSet<>();
    for (TestColumn testReadColumn : testReadColumns) {
        String name = testReadColumn.getBaseName();
        if (!baseColumnNames.contains(name) && !testReadColumn.isPartitionKey()) {
            baseColumnNames.add(name);
            splitPropertiesColumnNames.add(name);
            splitPropertiesColumnTypes.add(testReadColumn.getBaseObjectInspector().getTypeName());
        }
    }
    splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
    splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
    List<HivePartitionKey> partitionKeys = testReadColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
    String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
    List<HiveColumnHandle> columnHandles = getColumnHandles(testReadColumns);
    List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), fileSize, Instant.now().toEpochMilli());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(sourceFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), fileSize, splitProperties, TupleDomain.all(), columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
    assertTrue(pageSource.isPresent());
    checkPageSource(pageSource.get(), testReadColumns, getTypes(columnHandles), rowCount);
}
Also used : OrcFileWriterFactory(io.trino.plugin.hive.orc.OrcFileWriterFactory) ParquetFileWriterFactory(io.trino.plugin.hive.parquet.ParquetFileWriterFactory) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) HiveTestUtils.createGenericHiveRecordCursorProvider(io.trino.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) FileSplit(org.apache.hadoop.mapred.FileSplit) Locale(java.util.Locale) Configuration(org.apache.hadoop.conf.Configuration) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) AVRO(io.trino.plugin.hive.HiveStorageFormat.AVRO) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) ImmutableSet(com.google.common.collect.ImmutableSet) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ColumnMapping.buildColumnMappings(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.buildColumnMappings) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RcFilePageSourceFactory(io.trino.plugin.hive.rcfile.RcFilePageSourceFactory) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) CSV(io.trino.plugin.hive.HiveStorageFormat.CSV) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ParquetPageSourceFactory(io.trino.plugin.hive.parquet.ParquetPageSourceFactory) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ParquetWriterConfig(io.trino.plugin.hive.parquet.ParquetWriterConfig) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SEQUENCEFILE(io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE) OrcReaderOptions(io.trino.orc.OrcReaderOptions) OrcPageSourceFactory(io.trino.plugin.hive.orc.OrcPageSourceFactory) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) JSON(io.trino.plugin.hive.HiveStorageFormat.JSON) OrcWriterConfig(io.trino.plugin.hive.orc.OrcWriterConfig) RCBINARY(io.trino.plugin.hive.HiveStorageFormat.RCBINARY) RecordCursor(io.trino.spi.connector.RecordCursor) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveTestUtils.getTypes(io.trino.plugin.hive.HiveTestUtils.getTypes) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TupleDomain(io.trino.spi.predicate.TupleDomain) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) TestingConnectorSession(io.trino.testing.TestingConnectorSession) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) OrcWriterOptions(io.trino.orc.OrcWriterOptions) RCTEXT(io.trino.plugin.hive.HiveStorageFormat.RCTEXT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Properties(java.util.Properties) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HashSet(java.util.HashSet)

Example 7 with TESTING_TYPE_MANAGER

use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.

the class TestHivePageSink method createPageSource.

private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveConfig config, File outputFile) {
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde());
    splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList())));
    splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList())));
    HiveSplit split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), outputFile.lastModified(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), 0, false, TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), false, Optional.empty(), 0, SplitWeight.standard());
    ConnectorTableHandle table = new HiveTableHandle(SCHEMA_NAME, TABLE_NAME, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
    HivePageSourceProvider provider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, config, getDefaultHivePageSourceFactories(HDFS_ENVIRONMENT, config), getDefaultHiveRecordCursorProviders(config, HDFS_ENVIRONMENT), new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT, config), Optional.empty());
    return provider.createPageSource(transaction, getHiveSession(config), split, table, ImmutableList.copyOf(getColumnHandles()), DynamicFilter.EMPTY);
}
Also used : MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) TypeOperators(io.trino.spi.type.TypeOperators) SplitWeight(io.trino.spi.SplitWeight) HiveMetastoreFactory(io.trino.plugin.hive.metastore.HiveMetastoreFactory) TpchColumnType(io.trino.tpch.TpchColumnType) Math.round(java.lang.Math.round) Slices(io.airlift.slice.Slices) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) Path(org.apache.hadoop.fs.Path) LineItemColumn(io.trino.tpch.LineItemColumn) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) INTEGER(io.trino.spi.type.IntegerType.INTEGER) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) HiveTestUtils.getDefaultHiveRecordCursorProviders(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProviders) TestingNodeManager(io.trino.testing.TestingNodeManager) ImmutableMap(com.google.common.collect.ImmutableMap) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HIVE_DATE(io.trino.plugin.hive.HiveType.HIVE_DATE) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) LineItemGenerator(io.trino.tpch.LineItemGenerator) LineItem(io.trino.tpch.LineItem) List(java.util.List) Stream(java.util.stream.Stream) BIGINT(io.trino.spi.type.BigintType.BIGINT) DynamicFilter(io.trino.spi.connector.DynamicFilter) Optional(java.util.Optional) HivePageSinkMetadata(io.trino.plugin.hive.metastore.HivePageSinkMetadata) DATE(io.trino.spi.type.DateType.DATE) Joiner(com.google.common.base.Joiner) JsonCodec(io.airlift.json.JsonCodec) DIRECT_TO_TARGET_NEW_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_NEW_DIRECTORY) HiveTestUtils.getDefaultHivePageSourceFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories) HIVE_DOUBLE(io.trino.plugin.hive.HiveType.HIVE_DOUBLE) PageBuilder(io.trino.spi.PageBuilder) Type(io.trino.spi.type.Type) Page(io.trino.spi.Page) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) JoinCompiler(io.trino.sql.gen.JoinCompiler) OptionalInt(java.util.OptionalInt) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) ArrayList(java.util.ArrayList) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) NONE(io.trino.plugin.hive.HiveCompressionCodec.NONE) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) FileHiveMetastore.createTestingFileHiveMetastore(io.trino.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) BlockTypeOperators(io.trino.type.BlockTypeOperators) Properties(java.util.Properties) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) HiveTestUtils.getDefaultHiveFileWriterFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER) File(java.io.File) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) TpchColumnTypes(io.trino.tpch.TpchColumnTypes) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) Collectors.toList(java.util.stream.Collectors.toList) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) BlockBuilder(io.trino.spi.block.BlockBuilder) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) Properties(java.util.Properties) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle)

Example 8 with TESTING_TYPE_MANAGER

use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.

the class TestRowParametricType method testTypeSignatureRoundTrip.

@Test
public void testTypeSignatureRoundTrip() {
    TypeSignature typeSignature = new TypeSignature(ROW, TypeSignatureParameter.namedTypeParameter(new NamedTypeSignature(Optional.of(new RowFieldName("col1")), BIGINT.getTypeSignature())), TypeSignatureParameter.namedTypeParameter(new NamedTypeSignature(Optional.of(new RowFieldName("col2")), DOUBLE.getTypeSignature())));
    List<TypeParameter> parameters = typeSignature.getParameters().stream().map(parameter -> TypeParameter.of(parameter, TESTING_TYPE_MANAGER)).collect(Collectors.toList());
    Type rowType = RowParametricType.ROW.createType(TESTING_TYPE_MANAGER, parameters);
    assertEquals(rowType.getTypeSignature(), typeSignature);
}
Also used : NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) ROW(io.trino.spi.type.StandardTypes.ROW) Type(io.trino.spi.type.Type) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) Test(org.testng.annotations.Test) TypeParameter(io.trino.spi.type.TypeParameter) Collectors(java.util.stream.Collectors) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) List(java.util.List) BIGINT(io.trino.spi.type.BigintType.BIGINT) RowFieldName(io.trino.spi.type.RowFieldName) Optional(java.util.Optional) TypeSignatureParameter(io.trino.spi.type.TypeSignatureParameter) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) TypeSignature(io.trino.spi.type.TypeSignature) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) TypeSignature(io.trino.spi.type.TypeSignature) TypeParameter(io.trino.spi.type.TypeParameter) Type(io.trino.spi.type.Type) RowFieldName(io.trino.spi.type.RowFieldName) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) Test(org.testng.annotations.Test)

Example 9 with TESTING_TYPE_MANAGER

use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.

the class TestTransactionLogAccess method testSnapshotsAreConsistent.

@Test
public void testSnapshotsAreConsistent() throws Exception {
    String tableName = "person";
    File tempDir = Files.createTempDir();
    File tableDir = new File(tempDir, tableName);
    File transactionLogDir = new File(tableDir, TRANSACTION_LOG_DIRECTORY);
    transactionLogDir.mkdirs();
    File resourceDir = new File(getClass().getClassLoader().getResource("databricks/person/_delta_log").toURI());
    copyTransactionLogEntry(0, 12, resourceDir, transactionLogDir);
    Files.copy(new File(resourceDir, LAST_CHECKPOINT_FILENAME), new File(transactionLogDir, LAST_CHECKPOINT_FILENAME));
    setupTransactionLogAccess(tableName, new Path(tableDir.toURI()));
    List<AddFileEntry> expectedDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
    copyTransactionLogEntry(12, 14, resourceDir, transactionLogDir);
    Set<String> newDataFiles = ImmutableSet.of("age=28/part-00000-40dd1707-1d42-4328-a59a-21f5c945fe60.c000.snappy.parquet", "age=29/part-00000-3794c463-cb0c-4beb-8d07-7cc1e3b5920f.c000.snappy.parquet");
    TableSnapshot updatedTableSnapshot = transactionLogAccess.loadSnapshot(new SchemaTableName("schema", tableName), new Path(tableDir.toURI()), SESSION);
    List<AddFileEntry> allDataFiles = transactionLogAccess.getActiveFiles(updatedTableSnapshot, SESSION);
    List<AddFileEntry> dataFilesWithFixedVersion = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
    for (String newFilePath : newDataFiles) {
        assertTrue(allDataFiles.stream().anyMatch(entry -> entry.getPath().equals(newFilePath)));
        assertTrue(dataFilesWithFixedVersion.stream().noneMatch(entry -> entry.getPath().equals(newFilePath)));
    }
    assertEquals(expectedDataFiles.size(), dataFilesWithFixedVersion.size());
    List<ColumnMetadata> columns = extractSchema(transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get(), TESTING_TYPE_MANAGER);
    for (int i = 0; i < expectedDataFiles.size(); i++) {
        AddFileEntry expected = expectedDataFiles.get(i);
        AddFileEntry actual = dataFilesWithFixedVersion.get(i);
        assertEquals(expected.getPath(), actual.getPath());
        assertEquals(expected.getPartitionValues(), actual.getPartitionValues());
        assertEquals(expected.getSize(), actual.getSize());
        assertEquals(expected.getModificationTime(), actual.getModificationTime());
        assertEquals(expected.isDataChange(), actual.isDataChange());
        assertEquals(expected.getTags(), actual.getTags());
        assertTrue(expected.getStats().isPresent());
        assertTrue(actual.getStats().isPresent());
        for (ColumnMetadata column : columns) {
            DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(column.getName(), column.getType(), REGULAR);
            assertEquals(expected.getStats().get().getMinColumnValue(columnHandle), actual.getStats().get().getMinColumnValue(columnHandle));
            assertEquals(expected.getStats().get().getMaxColumnValue(columnHandle), actual.getStats().get().getMaxColumnValue(columnHandle));
            assertEquals(expected.getStats().get().getNullCount(columnHandle.getName()), actual.getStats().get().getNullCount(columnHandle.getName()));
            assertEquals(expected.getStats().get().getNumRecords(), actual.getStats().get().getNumRecords());
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) Test(org.testng.annotations.Test) DeltaLakeSchemaSupport.extractSchema(io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractSchema) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) RemoveFileEntry(io.trino.plugin.deltalake.transactionlog.RemoveFileEntry) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) BigDecimal(java.math.BigDecimal) CheckpointSchemaManager(io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointSchemaManager) Map(java.util.Map) Sets.union(com.google.common.collect.Sets.union) Path(org.apache.hadoop.fs.Path) HiveHdfsConfiguration(io.trino.plugin.hive.HiveHdfsConfiguration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) LAST_CHECKPOINT_FILENAME(io.trino.plugin.deltalake.transactionlog.TransactionLogParser.LAST_CHECKPOINT_FILENAME) Assert.assertFalse(org.testng.Assert.assertFalse) Assert.assertEquals(io.trino.testing.assertions.Assert.assertEquals) TRANSACTION_LOG_DIRECTORY(io.trino.plugin.deltalake.transactionlog.TransactionLogUtil.TRANSACTION_LOG_DIRECTORY) ImmutableSet(com.google.common.collect.ImmutableSet) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) HdfsEnvironment(io.trino.plugin.hive.HdfsEnvironment) ImmutableMap(com.google.common.collect.ImmutableMap) UTC_KEY(io.trino.spi.type.TimeZoneKey.UTC_KEY) SESSION(io.trino.testing.TestingConnectorSession.SESSION) MetadataEntry(io.trino.plugin.deltalake.transactionlog.MetadataEntry) Set(java.util.Set) Collectors(java.util.stream.Collectors) SchemaTableName(io.trino.spi.connector.SchemaTableName) String.format(java.lang.String.format) List(java.util.List) Stream(java.util.stream.Stream) HdfsConfig(io.trino.plugin.hive.HdfsConfig) LocalDate(java.time.LocalDate) HdfsConfigurationInitializer(io.trino.plugin.hive.HdfsConfigurationInitializer) Decimals(io.trino.spi.type.Decimals) UTC(java.time.ZoneOffset.UTC) Optional(java.util.Optional) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) REGULAR(io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR) DeltaLakeFileStatistics(io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeFileStatistics) DataProvider(org.testng.annotations.DataProvider) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) LocalDateTime(java.time.LocalDateTime) DateTimeEncoding(io.trino.spi.type.DateTimeEncoding) ImmutableList(com.google.common.collect.ImmutableList) Files(com.google.common.io.Files) IntegerType(io.trino.spi.type.IntegerType) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) IOException(java.io.IOException) HdfsConfiguration(io.trino.plugin.hive.HdfsConfiguration) TupleDomain(io.trino.spi.predicate.TupleDomain) TestingConnectorContext(io.trino.testing.TestingConnectorContext) File(java.io.File) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) ProtocolEntry(io.trino.plugin.deltalake.transactionlog.ProtocolEntry) Assert.assertTrue(org.testng.Assert.assertTrue) CommitInfoEntry(io.trino.plugin.deltalake.transactionlog.CommitInfoEntry) TypeManager(io.trino.spi.type.TypeManager) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) SchemaTableName(io.trino.spi.connector.SchemaTableName) AddFileEntry(io.trino.plugin.deltalake.transactionlog.AddFileEntry) TableSnapshot(io.trino.plugin.deltalake.transactionlog.TableSnapshot) File(java.io.File) Test(org.testng.annotations.Test)

Example 10 with TESTING_TYPE_MANAGER

use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.

the class TestOrcPredicates method createPageSource.

private ConnectorPageSource createPageSource(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split) {
    OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC);
    Properties splitProperties = new Properties();
    splitProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat());
    splitProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde());
    // Use full columns in split properties
    ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
    ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
    Set<String> baseColumnNames = new HashSet<>();
    for (TestColumn columnToRead : columnsToRead) {
        String name = columnToRead.getBaseName();
        if (!baseColumnNames.contains(name) && !columnToRead.isPartitionKey()) {
            baseColumnNames.add(name);
            splitPropertiesColumnNames.add(name);
            splitPropertiesColumnTypes.add(columnToRead.getBaseObjectInspector().getTypeName());
        }
    }
    splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
    splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
    List<HivePartitionKey> partitionKeys = columnsToRead.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
    String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
    List<HiveColumnHandle> columnHandles = getColumnHandles(columnsToRead);
    TupleDomain<HiveColumnHandle> predicate = effectivePredicate.transformKeys(testColumn -> {
        Optional<HiveColumnHandle> handle = columnHandles.stream().filter(column -> testColumn.getName().equals(column.getName())).findFirst();
        checkState(handle.isPresent(), "Predicate on invalid column");
        return handle.get();
    });
    List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), split.getLength(), Instant.now().toEpochMilli());
    Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(readerFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, predicate, columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
    assertTrue(pageSource.isPresent());
    return pageSource.get();
}
Also used : HivePageSourceProvider(io.trino.plugin.hive.HivePageSourceProvider) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) FileSplit(org.apache.hadoop.mapred.FileSplit) Configuration(org.apache.hadoop.conf.Configuration) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) AbstractTestHiveFileFormats(io.trino.plugin.hive.AbstractTestHiveFileFormats) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HiveCompressionCodec(io.trino.plugin.hive.HiveCompressionCodec) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) ImmutableSet(com.google.common.collect.ImmutableSet) FileFormatDataSourceStats(io.trino.plugin.hive.FileFormatDataSourceStats) ImmutableMap(com.google.common.collect.ImmutableMap) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ColumnMapping.buildColumnMappings(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.buildColumnMappings) TableToPartitionMapping(io.trino.plugin.hive.TableToPartitionMapping) BIGINT(io.trino.spi.type.BigintType.BIGINT) Optional(java.util.Optional) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) Page(io.trino.spi.Page) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) NodeVersion(io.trino.plugin.hive.NodeVersion) OrcReaderOptions(io.trino.orc.OrcReaderOptions) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TupleDomain(io.trino.spi.predicate.TupleDomain) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) OrcWriterOptions(io.trino.orc.OrcWriterOptions) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) HiveConfig(io.trino.plugin.hive.HiveConfig) Configuration(org.apache.hadoop.conf.Configuration) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) Properties(java.util.Properties) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) OrcReaderOptions(io.trino.orc.OrcReaderOptions) HivePartitionKey(io.trino.plugin.hive.HivePartitionKey) HiveColumnHandle(io.trino.plugin.hive.HiveColumnHandle) HashSet(java.util.HashSet)

Aggregations

TESTING_TYPE_MANAGER (io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER)13 List (java.util.List)13 Optional (java.util.Optional)13 Test (org.testng.annotations.Test)13 ImmutableList (com.google.common.collect.ImmutableList)12 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)11 Type (io.trino.spi.type.Type)11 Assert.assertTrue (org.testng.Assert.assertTrue)11 NO_ACID_TRANSACTION (io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION)10 ConnectorPageSource (io.trino.spi.connector.ConnectorPageSource)10 ConnectorSession (io.trino.spi.connector.ConnectorSession)10 File (java.io.File)10 OptionalInt (java.util.OptionalInt)10 Collectors.toList (java.util.stream.Collectors.toList)10 String.format (java.lang.String.format)9 ArrayList (java.util.ArrayList)9 Properties (java.util.Properties)9 Assert.assertEquals (org.testng.Assert.assertEquals)9 ImmutableMap (com.google.common.collect.ImmutableMap)8 ImmutableSet (com.google.common.collect.ImmutableSet)8