Search in sources :

Example 1 with ORC

use of io.trino.plugin.hive.HiveStorageFormat.ORC in project trino by trinodb.

the class AbstractTestHive method testApplyProjection.

@Test
public void testApplyProjection() throws Exception {
    ColumnMetadata bigIntColumn0 = new ColumnMetadata("int0", BIGINT);
    ColumnMetadata bigIntColumn1 = new ColumnMetadata("int1", BIGINT);
    RowType oneLevelRowType = toRowType(ImmutableList.of(bigIntColumn0, bigIntColumn1));
    ColumnMetadata oneLevelRow0 = new ColumnMetadata("onelevelrow0", oneLevelRowType);
    RowType twoLevelRowType = toRowType(ImmutableList.of(oneLevelRow0, bigIntColumn0, bigIntColumn1));
    ColumnMetadata twoLevelRow0 = new ColumnMetadata("twolevelrow0", twoLevelRowType);
    List<ColumnMetadata> columnsForApplyProjectionTest = ImmutableList.of(bigIntColumn0, bigIntColumn1, oneLevelRow0, twoLevelRow0);
    SchemaTableName tableName = temporaryTable("apply_projection_tester");
    doCreateEmptyTable(tableName, ORC, columnsForApplyProjectionTest);
    try (Transaction transaction = newTransaction()) {
        ConnectorSession session = newSession();
        ConnectorMetadata metadata = transaction.getMetadata();
        ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
        List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
        assertEquals(columnHandles.size(), columnsForApplyProjectionTest.size());
        Map<String, ColumnHandle> columnHandleMap = columnHandles.stream().collect(toImmutableMap(handle -> ((HiveColumnHandle) handle).getBaseColumnName(), Function.identity()));
        // Emulate symbols coming from the query plan and map them to column handles
        Map<String, ColumnHandle> columnHandlesWithSymbols = ImmutableMap.of("symbol_0", columnHandleMap.get("int0"), "symbol_1", columnHandleMap.get("int1"), "symbol_2", columnHandleMap.get("onelevelrow0"), "symbol_3", columnHandleMap.get("twolevelrow0"));
        // Create variables for the emulated symbols
        Map<String, Variable> symbolVariableMapping = columnHandlesWithSymbols.entrySet().stream().collect(toImmutableMap(Map.Entry::getKey, e -> new Variable(e.getKey(), ((HiveColumnHandle) e.getValue()).getBaseType())));
        // Create dereference expressions for testing
        FieldDereference symbol2Field0 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_2"), 0);
        FieldDereference symbol3Field0 = new FieldDereference(oneLevelRowType, symbolVariableMapping.get("symbol_3"), 0);
        FieldDereference symbol3Field0Field0 = new FieldDereference(BIGINT, symbol3Field0, 0);
        FieldDereference symbol3Field1 = new FieldDereference(BIGINT, symbolVariableMapping.get("symbol_3"), 1);
        Map<String, ColumnHandle> inputAssignments;
        List<ConnectorExpression> inputProjections;
        Optional<ProjectionApplicationResult<ConnectorTableHandle>> projectionResult;
        List<ConnectorExpression> expectedProjections;
        Map<String, Type> expectedAssignments;
        // Test projected columns pushdown to HiveTableHandle in case of all variable references
        inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_0", "symbol_1"));
        inputProjections = ImmutableList.of(symbolVariableMapping.get("symbol_0"), symbolVariableMapping.get("symbol_1"));
        expectedAssignments = ImmutableMap.of("symbol_0", BIGINT, "symbol_1", BIGINT);
        projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
        assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments);
        // Empty result when projected column handles are same as those present in table handle
        projectionResult = metadata.applyProjection(session, projectionResult.get().getHandle(), inputProjections, inputAssignments);
        assertProjectionResult(projectionResult, true, ImmutableList.of(), ImmutableMap.of());
        // Extra columns handles in HiveTableHandle should get pruned
        projectionResult = metadata.applyProjection(session, ((HiveTableHandle) tableHandle).withProjectedColumns(ImmutableSet.copyOf(columnHandles)), inputProjections, inputAssignments);
        assertProjectionResult(projectionResult, false, inputProjections, expectedAssignments);
        // Test projection pushdown for dereferences
        inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2", "symbol_3"));
        inputProjections = ImmutableList.of(symbol2Field0, symbol3Field0Field0, symbol3Field1);
        expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "twolevelrow0#f_onelevelrow0#f_int0", BIGINT, "twolevelrow0#f_int0", BIGINT);
        expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("twolevelrow0#f_onelevelrow0#f_int0", BIGINT), new Variable("twolevelrow0#f_int0", BIGINT));
        projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
        assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
        // Test reuse of virtual column handles
        // Round-1: input projections [symbol_2, symbol_2.int0]. virtual handle is created for symbol_2.int0.
        inputAssignments = getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"));
        inputProjections = ImmutableList.of(symbol2Field0, symbolVariableMapping.get("symbol_2"));
        projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
        expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), symbolVariableMapping.get("symbol_2"));
        expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT, "symbol_2", oneLevelRowType);
        assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
        // Round-2: input projections [symbol_2.int0 and onelevelrow0#f_int0]. Virtual handle is reused.
        Assignment newlyCreatedColumn = getOnlyElement(projectionResult.get().getAssignments().stream().filter(handle -> handle.getVariable().equals("onelevelrow0#f_int0")).collect(toList()));
        inputAssignments = ImmutableMap.<String, ColumnHandle>builder().putAll(getColumnHandlesFor(columnHandlesWithSymbols, ImmutableList.of("symbol_2"))).put(newlyCreatedColumn.getVariable(), newlyCreatedColumn.getColumn()).buildOrThrow();
        inputProjections = ImmutableList.of(symbol2Field0, new Variable("onelevelrow0#f_int0", BIGINT));
        projectionResult = metadata.applyProjection(session, tableHandle, inputProjections, inputAssignments);
        expectedProjections = ImmutableList.of(new Variable("onelevelrow0#f_int0", BIGINT), new Variable("onelevelrow0#f_int0", BIGINT));
        expectedAssignments = ImmutableMap.of("onelevelrow0#f_int0", BIGINT);
        assertProjectionResult(projectionResult, false, expectedProjections, expectedAssignments);
    } finally {
        dropTable(tableName);
    }
}
Also used : Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) FileStatus(org.apache.hadoop.fs.FileStatus) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveColumnStatistics.createDateColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) MaterializedRow(io.trino.testing.MaterializedRow) ROLLBACK_AFTER_FINISH_INSERT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Domain(io.trino.spi.predicate.Domain) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) ASCENDING(io.trino.plugin.hive.metastore.SortingColumn.Order.ASCENDING) ValueSet(io.trino.spi.predicate.ValueSet) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) COMMIT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) NOT_PARTITIONED(io.trino.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) DESCENDING(io.trino.plugin.hive.metastore.SortingColumn.Order.DESCENDING) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) TrinoS3ConfigurationInitializer(io.trino.plugin.hive.s3.TrinoS3ConfigurationInitializer) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) MetastoreLocator(io.trino.plugin.hive.metastore.thrift.MetastoreLocator) ROLLBACK_AFTER_SINK_FINISH(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) REAL(io.trino.spi.type.RealType.REAL) Partition(io.trino.plugin.hive.metastore.Partition) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) LocalDateTime(java.time.LocalDateTime) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) SEQUENCEFILE(io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) HIVE_INVALID_PARTITION_VALUE(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) HiveSessionProperties.isTemporaryStagingDirectoryEnabled(io.trino.plugin.hive.HiveSessionProperties.isTemporaryStagingDirectoryEnabled) AfterClass(org.testng.annotations.AfterClass) HiveAzureConfig(io.trino.plugin.hive.azure.HiveAzureConfig) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) MapType(io.trino.spi.type.MapType) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) TRANSACTIONAL(io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) ROLLBACK_AFTER_BEGIN_INSERT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) HostAndPort(com.google.common.net.HostAndPort) CatalogName(io.trino.plugin.base.CatalogName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ThriftMetastoreConfig(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig) RCTEXT(io.trino.plugin.hive.HiveStorageFormat.RCTEXT) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HiveColumnStatistics.createStringColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) MaterializedResult(io.trino.testing.MaterializedResult) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) Duration(io.airlift.units.Duration) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) SqlTimestamp(io.trino.spi.type.SqlTimestamp) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) NOOP_METADATA_PROVIDER(io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER) HiveMetastoreFactory(io.trino.plugin.hive.metastore.HiveMetastoreFactory) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveTestUtils.arrayType(io.trino.plugin.hive.HiveTestUtils.arrayType) Block(io.trino.spi.block.Block) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ROLLBACK_RIGHT_AWAY(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) ImmutableSet(com.google.common.collect.ImmutableSet) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) TrinoAzureConfigurationInitializer(io.trino.plugin.hive.azure.TrinoAzureConfigurationInitializer) HiveColumnStatistics.createDoubleColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) LocalDate(java.time.LocalDate) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) HiveTestUtils.getDefaultHivePageSourceFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories) Constraint(io.trino.spi.connector.Constraint) Assert.assertNull(org.testng.Assert.assertNull) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HashSet(java.util.HashSet) HYPER_LOG_LOG(io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG) ImmutableList(com.google.common.collect.ImmutableList) BridgingHiveMetastore(io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Math.toIntExact(java.lang.Math.toIntExact) ExecutorService(java.util.concurrent.ExecutorService) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) DESC_NULLS_LAST(io.trino.spi.connector.SortOrder.DESC_NULLS_LAST) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Assert.fail(org.testng.Assert.fail) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) DateTime(org.joda.time.DateTime) PAGE_SORTER(io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) Hashing.sha256(com.google.common.hash.Hashing.sha256) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) PRESTO_VERSION_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) Assert.assertTrue(org.testng.Assert.assertTrue) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) ConnectorPageSinkProvider(io.trino.spi.connector.ConnectorPageSinkProvider) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Arrays(java.util.Arrays) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) USER(io.trino.spi.security.PrincipalType.USER) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) HiveColumnStatistics.createDecimalColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) TypeOperators(io.trino.spi.type.TypeOperators) ROLLBACK_AFTER_APPEND_PAGE(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) HiveTestUtils.rowType(io.trino.plugin.hive.HiveTestUtils.rowType) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) CharType.createCharType(io.trino.spi.type.CharType.createCharType) BigDecimal(java.math.BigDecimal) TypeId(io.trino.spi.type.TypeId) Sets.difference(com.google.common.collect.Sets.difference) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) Column(io.trino.plugin.hive.metastore.Column) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HiveTestUtils.getDefaultHiveRecordCursorProviders(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProviders) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) Table(io.trino.plugin.hive.metastore.Table) TestingNodeManager(io.trino.testing.TestingNodeManager) Range(io.trino.spi.predicate.Range) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) RcFilePageSource(io.trino.plugin.hive.rcfile.RcFilePageSource) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) Lists.reverse(com.google.common.collect.Lists.reverse) DATE(io.trino.spi.type.DateType.DATE) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) MINUTES(java.util.concurrent.TimeUnit.MINUTES) JoinCompiler(io.trino.sql.gen.JoinCompiler) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) UNGROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ColumnHandle(io.trino.spi.connector.ColumnHandle) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HIVE_INVALID_BUCKET_FILES(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) TestingMetastoreLocator(io.trino.plugin.hive.metastore.thrift.TestingMetastoreLocator) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) GoogleGcsConfigurationInitializer(io.trino.plugin.hive.gcs.GoogleGcsConfigurationInitializer) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) RecordCursor(io.trino.spi.connector.RecordCursor) BlockTypeOperators(io.trino.type.BlockTypeOperators) LongStream(java.util.stream.LongStream) NO_REDIRECTIONS(io.trino.plugin.hive.HiveTableRedirectionsProvider.NO_REDIRECTIONS) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) HiveTestUtils.getTypes(io.trino.plugin.hive.HiveTestUtils.getTypes) TRANSACTION_CONFLICT(io.trino.spi.StandardErrorCode.TRANSACTION_CONFLICT) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UTC(org.joda.time.DateTimeZone.UTC) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) SqlVarbinary(io.trino.spi.type.SqlVarbinary) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) CharType(io.trino.spi.type.CharType) TableType(org.apache.hadoop.hive.metastore.TableType) CachingHiveMetastore.cachingHiveMetastore(io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.cachingHiveMetastore) TINYINT(io.trino.spi.type.TinyintType.TINYINT) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) QueryAssertions.assertEqualsIgnoreOrder(io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder) HiveGcsConfig(io.trino.plugin.hive.gcs.HiveGcsConfig) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) TIMESTAMP_WITH_TIME_ZONE(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE) AVRO(io.trino.plugin.hive.HiveStorageFormat.AVRO) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) HiveWriteUtils.getTableDefaultLocation(io.trino.plugin.hive.util.HiveWriteUtils.getTableDefaultLocation) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) ThriftHiveMetastore(io.trino.plugin.hive.metastore.thrift.ThriftHiveMetastore) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) MaterializedResult.materializeSourceDataStream(io.trino.testing.MaterializedResult.materializeSourceDataStream) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ASC_NULLS_FIRST(io.trino.spi.connector.SortOrder.ASC_NULLS_FIRST) String.format(java.lang.String.format) SqlDate(io.trino.spi.type.SqlDate) Preconditions.checkState(com.google.common.base.Preconditions.checkState) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Assignment(io.trino.spi.connector.Assignment) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) SqlStandardAccessControlMetadata(io.trino.plugin.hive.security.SqlStandardAccessControlMetadata) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) CSV(io.trino.plugin.hive.HiveStorageFormat.CSV) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ROLLBACK_AFTER_DELETE(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) RowFieldName(io.trino.spi.type.RowFieldName) JSON(io.trino.plugin.hive.HiveStorageFormat.JSON) RCBINARY(io.trino.plugin.hive.HiveStorageFormat.RCBINARY) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) FieldDereference(io.trino.spi.expression.FieldDereference) HiveTestUtils.getDefaultHiveFileWriterFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) TestingConnectorSession(io.trino.testing.TestingConnectorSession) HiveS3Config(io.trino.plugin.hive.s3.HiveS3Config) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) SECONDS(java.util.concurrent.TimeUnit.SECONDS) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) Variable(io.trino.spi.expression.Variable) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) RowType(io.trino.spi.type.RowType) Assignment(io.trino.spi.connector.Assignment) FieldDereference(io.trino.spi.expression.FieldDereference) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) ColumnHandle(io.trino.spi.connector.ColumnHandle) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) MapType(io.trino.spi.type.MapType) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) HiveTestUtils.arrayType(io.trino.plugin.hive.HiveTestUtils.arrayType) HiveTestUtils.rowType(io.trino.plugin.hive.HiveTestUtils.rowType) CharType.createCharType(io.trino.spi.type.CharType.createCharType) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) CharType(io.trino.spi.type.CharType) TableType(org.apache.hadoop.hive.metastore.TableType) RowType(io.trino.spi.type.RowType) ArrayType(io.trino.spi.type.ArrayType) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) VarcharType(io.trino.spi.type.VarcharType) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) HashMap(java.util.HashMap) Test(org.testng.annotations.Test)

Example 2 with ORC

use of io.trino.plugin.hive.HiveStorageFormat.ORC in project trino by trinodb.

the class AbstractTestHive method createDummyPartitionedTable.

protected void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns) throws Exception {
    doCreateEmptyTable(tableName, ORC, columns);
    HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
    Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    List<String> firstPartitionValues = ImmutableList.of("2016-01-01");
    List<String> secondPartitionValues = ImmutableList.of("2016-01-02");
    String firstPartitionName = makePartName(ImmutableList.of("ds"), firstPartitionValues);
    String secondPartitionName = makePartName(ImmutableList.of("ds"), secondPartitionValues);
    List<PartitionWithStatistics> partitions = ImmutableList.of(firstPartitionName, secondPartitionName).stream().map(partitionName -> new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty())).collect(toImmutableList());
    metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions);
    metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), firstPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
    metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), secondPartitionName, currentStatistics -> EMPTY_TABLE_STATISTICS);
}
Also used : Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) FileStatus(org.apache.hadoop.fs.FileStatus) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveColumnStatistics.createDateColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) MaterializedRow(io.trino.testing.MaterializedRow) ROLLBACK_AFTER_FINISH_INSERT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Domain(io.trino.spi.predicate.Domain) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) ASCENDING(io.trino.plugin.hive.metastore.SortingColumn.Order.ASCENDING) ValueSet(io.trino.spi.predicate.ValueSet) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) COMMIT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) NOT_PARTITIONED(io.trino.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) DESCENDING(io.trino.plugin.hive.metastore.SortingColumn.Order.DESCENDING) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) TrinoS3ConfigurationInitializer(io.trino.plugin.hive.s3.TrinoS3ConfigurationInitializer) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) MetastoreLocator(io.trino.plugin.hive.metastore.thrift.MetastoreLocator) ROLLBACK_AFTER_SINK_FINISH(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) REAL(io.trino.spi.type.RealType.REAL) Partition(io.trino.plugin.hive.metastore.Partition) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) LocalDateTime(java.time.LocalDateTime) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) SEQUENCEFILE(io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) HIVE_INVALID_PARTITION_VALUE(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) HiveSessionProperties.isTemporaryStagingDirectoryEnabled(io.trino.plugin.hive.HiveSessionProperties.isTemporaryStagingDirectoryEnabled) AfterClass(org.testng.annotations.AfterClass) HiveAzureConfig(io.trino.plugin.hive.azure.HiveAzureConfig) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) MapType(io.trino.spi.type.MapType) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) TRANSACTIONAL(io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) ROLLBACK_AFTER_BEGIN_INSERT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) HostAndPort(com.google.common.net.HostAndPort) CatalogName(io.trino.plugin.base.CatalogName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ThriftMetastoreConfig(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig) RCTEXT(io.trino.plugin.hive.HiveStorageFormat.RCTEXT) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HiveColumnStatistics.createStringColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) MaterializedResult(io.trino.testing.MaterializedResult) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) Duration(io.airlift.units.Duration) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) SqlTimestamp(io.trino.spi.type.SqlTimestamp) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) NOOP_METADATA_PROVIDER(io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER) HiveMetastoreFactory(io.trino.plugin.hive.metastore.HiveMetastoreFactory) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveTestUtils.arrayType(io.trino.plugin.hive.HiveTestUtils.arrayType) Block(io.trino.spi.block.Block) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ROLLBACK_RIGHT_AWAY(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) ImmutableSet(com.google.common.collect.ImmutableSet) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) TrinoAzureConfigurationInitializer(io.trino.plugin.hive.azure.TrinoAzureConfigurationInitializer) HiveColumnStatistics.createDoubleColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) LocalDate(java.time.LocalDate) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) HiveTestUtils.getDefaultHivePageSourceFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories) Constraint(io.trino.spi.connector.Constraint) Assert.assertNull(org.testng.Assert.assertNull) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HashSet(java.util.HashSet) HYPER_LOG_LOG(io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG) ImmutableList(com.google.common.collect.ImmutableList) BridgingHiveMetastore(io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Math.toIntExact(java.lang.Math.toIntExact) ExecutorService(java.util.concurrent.ExecutorService) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) DESC_NULLS_LAST(io.trino.spi.connector.SortOrder.DESC_NULLS_LAST) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Assert.fail(org.testng.Assert.fail) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) DateTime(org.joda.time.DateTime) PAGE_SORTER(io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) Hashing.sha256(com.google.common.hash.Hashing.sha256) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) PRESTO_VERSION_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) Assert.assertTrue(org.testng.Assert.assertTrue) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) ConnectorPageSinkProvider(io.trino.spi.connector.ConnectorPageSinkProvider) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Arrays(java.util.Arrays) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) USER(io.trino.spi.security.PrincipalType.USER) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) HiveColumnStatistics.createDecimalColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) TypeOperators(io.trino.spi.type.TypeOperators) ROLLBACK_AFTER_APPEND_PAGE(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) HiveTestUtils.rowType(io.trino.plugin.hive.HiveTestUtils.rowType) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) CharType.createCharType(io.trino.spi.type.CharType.createCharType) BigDecimal(java.math.BigDecimal) TypeId(io.trino.spi.type.TypeId) Sets.difference(com.google.common.collect.Sets.difference) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) Column(io.trino.plugin.hive.metastore.Column) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HiveTestUtils.getDefaultHiveRecordCursorProviders(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProviders) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) Table(io.trino.plugin.hive.metastore.Table) TestingNodeManager(io.trino.testing.TestingNodeManager) Range(io.trino.spi.predicate.Range) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) RcFilePageSource(io.trino.plugin.hive.rcfile.RcFilePageSource) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) Lists.reverse(com.google.common.collect.Lists.reverse) DATE(io.trino.spi.type.DateType.DATE) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) MINUTES(java.util.concurrent.TimeUnit.MINUTES) JoinCompiler(io.trino.sql.gen.JoinCompiler) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) UNGROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ColumnHandle(io.trino.spi.connector.ColumnHandle) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HIVE_INVALID_BUCKET_FILES(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) TestingMetastoreLocator(io.trino.plugin.hive.metastore.thrift.TestingMetastoreLocator) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) GoogleGcsConfigurationInitializer(io.trino.plugin.hive.gcs.GoogleGcsConfigurationInitializer) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) RecordCursor(io.trino.spi.connector.RecordCursor) BlockTypeOperators(io.trino.type.BlockTypeOperators) LongStream(java.util.stream.LongStream) NO_REDIRECTIONS(io.trino.plugin.hive.HiveTableRedirectionsProvider.NO_REDIRECTIONS) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) HiveTestUtils.getTypes(io.trino.plugin.hive.HiveTestUtils.getTypes) TRANSACTION_CONFLICT(io.trino.spi.StandardErrorCode.TRANSACTION_CONFLICT) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UTC(org.joda.time.DateTimeZone.UTC) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) SqlVarbinary(io.trino.spi.type.SqlVarbinary) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) CharType(io.trino.spi.type.CharType) TableType(org.apache.hadoop.hive.metastore.TableType) CachingHiveMetastore.cachingHiveMetastore(io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.cachingHiveMetastore) TINYINT(io.trino.spi.type.TinyintType.TINYINT) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) QueryAssertions.assertEqualsIgnoreOrder(io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder) HiveGcsConfig(io.trino.plugin.hive.gcs.HiveGcsConfig) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) TIMESTAMP_WITH_TIME_ZONE(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE) AVRO(io.trino.plugin.hive.HiveStorageFormat.AVRO) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) HiveWriteUtils.getTableDefaultLocation(io.trino.plugin.hive.util.HiveWriteUtils.getTableDefaultLocation) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) ThriftHiveMetastore(io.trino.plugin.hive.metastore.thrift.ThriftHiveMetastore) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) MaterializedResult.materializeSourceDataStream(io.trino.testing.MaterializedResult.materializeSourceDataStream) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ASC_NULLS_FIRST(io.trino.spi.connector.SortOrder.ASC_NULLS_FIRST) String.format(java.lang.String.format) SqlDate(io.trino.spi.type.SqlDate) Preconditions.checkState(com.google.common.base.Preconditions.checkState) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Assignment(io.trino.spi.connector.Assignment) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) SqlStandardAccessControlMetadata(io.trino.plugin.hive.security.SqlStandardAccessControlMetadata) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) CSV(io.trino.plugin.hive.HiveStorageFormat.CSV) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ROLLBACK_AFTER_DELETE(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) RowFieldName(io.trino.spi.type.RowFieldName) JSON(io.trino.plugin.hive.HiveStorageFormat.JSON) RCBINARY(io.trino.plugin.hive.HiveStorageFormat.RCBINARY) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) FieldDereference(io.trino.spi.expression.FieldDereference) HiveTestUtils.getDefaultHiveFileWriterFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) TestingConnectorSession(io.trino.testing.TestingConnectorSession) HiveS3Config(io.trino.plugin.hive.s3.HiveS3Config) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) SECONDS(java.util.concurrent.TimeUnit.SECONDS) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Table(io.trino.plugin.hive.metastore.Table) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics)

Example 3 with ORC

use of io.trino.plugin.hive.HiveStorageFormat.ORC in project trino by trinodb.

the class TestHiveGlueMetastore method createDummyPartitionedTable.

private void createDummyPartitionedTable(SchemaTableName tableName, List<ColumnMetadata> columns, List<String> partitionColumnNames, List<PartitionValues> partitionValues) throws Exception {
    doCreateEmptyTable(tableName, ORC, columns, partitionColumnNames);
    HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
    Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
    List<PartitionWithStatistics> partitions = new ArrayList<>();
    List<String> partitionNames = new ArrayList<>();
    partitionValues.stream().map(partitionValue -> makePartName(partitionColumnNames, partitionValue.values)).forEach(partitionName -> {
        partitions.add(new PartitionWithStatistics(createDummyPartition(table, partitionName), partitionName, PartitionStatistics.empty()));
        partitionNames.add(partitionName);
    });
    metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), partitions);
    partitionNames.forEach(partitionName -> metastoreClient.updatePartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), partitionName, currentStatistics -> EMPTY_TABLE_STATISTICS));
}
Also used : Arrays(java.util.Arrays) DateType(io.trino.spi.type.DateType) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) NUMBER_OF_DISTINCT_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_DISTINCT_VALUES) ColumnStatisticMetadata(io.trino.spi.statistics.ColumnStatisticMetadata) DeleteTableRequest(com.amazonaws.services.glue.model.DeleteTableRequest) GetDatabasesResult(com.amazonaws.services.glue.model.GetDatabasesResult) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) BoundedExecutor(io.airlift.concurrent.BoundedExecutor) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) Map(java.util.Map) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ENGLISH(java.util.Locale.ENGLISH) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Table(io.trino.plugin.hive.metastore.Table) Range(io.trino.spi.predicate.Range) Domain(io.trino.spi.predicate.Domain) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) TableInput(com.amazonaws.services.glue.model.TableInput) UpdateTableRequest(com.amazonaws.services.glue.model.UpdateTableRequest) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) SchemaTableName(io.trino.spi.connector.SchemaTableName) EntityNotFoundException(com.amazonaws.services.glue.model.EntityNotFoundException) AWSGlueAsync(com.amazonaws.services.glue.AWSGlueAsync) PartitionStatistics(io.trino.plugin.hive.PartitionStatistics) Slice(io.airlift.slice.Slice) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ArrayList(java.util.ArrayList) HiveType(io.trino.plugin.hive.HiveType) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) CreateTableRequest(com.amazonaws.services.glue.model.CreateTableRequest) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) NUMBER_OF_NON_NULL_VALUES(io.trino.spi.statistics.ColumnStatisticType.NUMBER_OF_NON_NULL_VALUES) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) Executor(java.util.concurrent.Executor) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) File(java.io.File) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure) AwsSdkUtil.getPaginatedResults(io.trino.plugin.hive.metastore.glue.AwsSdkUtil.getPaginatedResults) MAX_VALUE(io.trino.spi.statistics.ColumnStatisticType.MAX_VALUE) Database(com.amazonaws.services.glue.model.Database) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) MaterializedResult(io.trino.testing.MaterializedResult) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) EXTERNAL_TABLE(org.apache.hadoop.hive.metastore.TableType.EXTERNAL_TABLE) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) GetDatabasesRequest(com.amazonaws.services.glue.model.GetDatabasesRequest) Block(io.trino.spi.block.Block) SmallintType(io.trino.spi.type.SmallintType) HiveBasicStatistics(io.trino.plugin.hive.HiveBasicStatistics) AWSGlueAsyncClientBuilder(com.amazonaws.services.glue.AWSGlueAsyncClientBuilder) ImmutableMap(com.google.common.collect.ImmutableMap) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) ComputedStatistics(io.trino.spi.statistics.ComputedStatistics) TrinoException(io.trino.spi.TrinoException) String.format(java.lang.String.format) List(java.util.List) DECIMAL_TYPE(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.DECIMAL_TYPE) BIGINT(io.trino.spi.type.BigintType.BIGINT) PartitionFilterBuilder.decimalOf(io.trino.plugin.hive.metastore.glue.PartitionFilterBuilder.decimalOf) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) TableStatisticType(io.trino.spi.statistics.TableStatisticType) System.currentTimeMillis(java.lang.System.currentTimeMillis) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Assert.assertEquals(org.testng.Assert.assertEquals) BigintType(io.trino.spi.type.BigintType) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ImmutableList(com.google.common.collect.ImmutableList) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) DAYS(java.util.concurrent.TimeUnit.DAYS) GlueInputConverter(io.trino.plugin.hive.metastore.glue.converter.GlueInputConverter) DeleteDatabaseRequest(com.amazonaws.services.glue.model.DeleteDatabaseRequest) TinyintType(io.trino.spi.type.TinyintType) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) IntegerType(io.trino.spi.type.IntegerType) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) TupleDomain(io.trino.spi.predicate.TupleDomain) UUID.randomUUID(java.util.UUID.randomUUID) AbstractTestHiveLocal(io.trino.plugin.hive.AbstractTestHiveLocal) Assert.assertTrue(org.testng.Assert.assertTrue) Collections(java.util.Collections) MIN_VALUE(io.trino.spi.statistics.ColumnStatisticType.MIN_VALUE) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Table(io.trino.plugin.hive.metastore.Table) HiveUtil.isIcebergTable(io.trino.plugin.hive.util.HiveUtil.isIcebergTable) HiveUtil.isDeltaLakeTable(io.trino.plugin.hive.util.HiveUtil.isDeltaLakeTable) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) ArrayList(java.util.ArrayList) HiveMetastoreClosure(io.trino.plugin.hive.HiveMetastoreClosure)

Example 4 with ORC

use of io.trino.plugin.hive.HiveStorageFormat.ORC in project trino by trinodb.

the class AbstractTestHive method testStorePartitionWithStatistics.

protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics) throws Exception {
    SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
    try {
        doCreateEmptyTable(tableName, ORC, columns);
        HiveMetastoreClosure metastoreClient = new HiveMetastoreClosure(getMetastoreClient());
        Table table = metastoreClient.getTable(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
        List<String> partitionValues = ImmutableList.of("2016-01-01");
        String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
        Partition partition = createDummyPartition(table, partitionName);
        // create partition with stats for all columns
        metastoreClient.addPartitions(tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
        assertEquals(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(ORC));
        assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
        // alter the partition into one with other stats
        Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(RCBINARY)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
        assertEquals(metastoreClient.getPartition(tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), fromHiveStorageFormat(RCBINARY));
        assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
        // alter the partition into one with stats for only subset of columns
        modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
        assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
        // alter the partition into one without stats
        modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        metastoreClient.alterPartition(tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
        assertThat(metastoreClient.getPartitionStatistics(tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
    } finally {
        dropTable(tableName);
    }
}
Also used : Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) FileStatus(org.apache.hadoop.fs.FileStatus) NOT_SUPPORTED(io.trino.spi.StandardErrorCode.NOT_SUPPORTED) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) HiveColumnStatistics.createDateColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDateColumnStatistics) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) PRESTO_QUERY_ID_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ViewNotFoundException(io.trino.spi.connector.ViewNotFoundException) MaterializedRow(io.trino.testing.MaterializedRow) ROLLBACK_AFTER_FINISH_INSERT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) HiveIdentity(io.trino.plugin.hive.authentication.HiveIdentity) Domain(io.trino.spi.predicate.Domain) MANAGED_TABLE(org.apache.hadoop.hive.metastore.TableType.MANAGED_TABLE) ASCENDING(io.trino.plugin.hive.metastore.SortingColumn.Order.ASCENDING) ValueSet(io.trino.spi.predicate.ValueSet) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) COMMIT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) NOT_PARTITIONED(io.trino.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) DESCENDING(io.trino.plugin.hive.metastore.SortingColumn.Order.DESCENDING) ConnectorPartitioningHandle(io.trino.spi.connector.ConnectorPartitioningHandle) TrinoS3ConfigurationInitializer(io.trino.plugin.hive.s3.TrinoS3ConfigurationInitializer) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) MetastoreLocator(io.trino.plugin.hive.metastore.thrift.MetastoreLocator) ROLLBACK_AFTER_SINK_FINISH(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TableScanRedirectApplicationResult(io.trino.spi.connector.TableScanRedirectApplicationResult) TableColumnsMetadata(io.trino.spi.connector.TableColumnsMetadata) REAL(io.trino.spi.type.RealType.REAL) Partition(io.trino.plugin.hive.metastore.Partition) BUCKETED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) ColumnMetadata(io.trino.spi.connector.ColumnMetadata) TIMESTAMP_MILLIS(io.trino.spi.type.TimestampType.TIMESTAMP_MILLIS) LocalDateTime(java.time.LocalDateTime) ConnectorTableMetadata(io.trino.spi.connector.ConnectorTableMetadata) HiveBasicStatistics.createEmptyStatistics(io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics) Variable(io.trino.spi.expression.Variable) StorageFormat.fromHiveStorageFormat(io.trino.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) OptionalLong(java.util.OptionalLong) VARCHAR(io.trino.spi.type.VarcharType.VARCHAR) HiveMetastore(io.trino.plugin.hive.metastore.HiveMetastore) OrcPageSource(io.trino.plugin.hive.orc.OrcPageSource) SEQUENCEFILE(io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) HIVE_INVALID_PARTITION_VALUE(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_PARTITION_VALUE) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) HiveSessionProperties.isTemporaryStagingDirectoryEnabled(io.trino.plugin.hive.HiveSessionProperties.isTemporaryStagingDirectoryEnabled) AfterClass(org.testng.annotations.AfterClass) HiveAzureConfig(io.trino.plugin.hive.azure.HiveAzureConfig) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) MapType(io.trino.spi.type.MapType) ConnectorSplit(io.trino.spi.connector.ConnectorSplit) TRANSACTIONAL(io.trino.plugin.hive.HiveTableProperties.TRANSACTIONAL) SPARK_TABLE_PROVIDER_KEY(io.trino.plugin.hive.util.HiveUtil.SPARK_TABLE_PROVIDER_KEY) ConnectorSplitSource(io.trino.spi.connector.ConnectorSplitSource) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.trino.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) ROLLBACK_AFTER_BEGIN_INSERT(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) HostAndPort(com.google.common.net.HostAndPort) CatalogName(io.trino.plugin.base.CatalogName) DOUBLE(io.trino.spi.type.DoubleType.DOUBLE) HIVE_INT(io.trino.plugin.hive.HiveType.HIVE_INT) ConnectorTableProperties(io.trino.spi.connector.ConnectorTableProperties) ConnectorExpression(io.trino.spi.expression.ConnectorExpression) HiveTestUtils.mapType(io.trino.plugin.hive.HiveTestUtils.mapType) ParquetPageSource(io.trino.plugin.hive.parquet.ParquetPageSource) ThriftMetastoreConfig(io.trino.plugin.hive.metastore.thrift.ThriftMetastoreConfig) RCTEXT(io.trino.plugin.hive.HiveStorageFormat.RCTEXT) VarcharType.createVarcharType(io.trino.spi.type.VarcharType.createVarcharType) WriteInfo(io.trino.plugin.hive.LocationService.WriteInfo) HivePrivilege(io.trino.plugin.hive.metastore.HivePrivilegeInfo.HivePrivilege) PARTITION_KEY(io.trino.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) HiveColumnStatistics.createStringColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createStringColumnStatistics) MoreFiles.deleteRecursively(com.google.common.io.MoreFiles.deleteRecursively) MaterializedResult(io.trino.testing.MaterializedResult) HiveUtil.toPartitionValues(io.trino.plugin.hive.util.HiveUtil.toPartitionValues) NO_RETRIES(io.trino.spi.connector.RetryMode.NO_RETRIES) ConnectorMaterializedViewDefinition(io.trino.spi.connector.ConnectorMaterializedViewDefinition) ICEBERG_TABLE_TYPE_VALUE(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_VALUE) Duration(io.airlift.units.Duration) BUCKETING_V1(io.trino.plugin.hive.util.HiveBucketing.BucketingVersion.BUCKETING_V1) SqlTimestamp(io.trino.spi.type.SqlTimestamp) ICEBERG_TABLE_TYPE_NAME(io.trino.plugin.hive.util.HiveUtil.ICEBERG_TABLE_TYPE_NAME) NOOP_METADATA_PROVIDER(io.trino.spi.connector.MetadataProvider.NOOP_METADATA_PROVIDER) HiveMetastoreFactory(io.trino.plugin.hive.metastore.HiveMetastoreFactory) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) HiveTestUtils.arrayType(io.trino.plugin.hive.HiveTestUtils.arrayType) Block(io.trino.spi.block.Block) HIVE_PARTITION_SCHEMA_MISMATCH(io.trino.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) ConnectorViewDefinition(io.trino.spi.connector.ConnectorViewDefinition) INTEGER(io.trino.spi.type.IntegerType.INTEGER) ROLLBACK_RIGHT_AWAY(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) ImmutableSet(com.google.common.collect.ImmutableSet) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) TrinoAzureConfigurationInitializer(io.trino.plugin.hive.azure.TrinoAzureConfigurationInitializer) HiveColumnStatistics.createDoubleColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDoubleColumnStatistics) BUCKET_COLUMN_NAME(io.trino.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) BIGINT(io.trino.spi.type.BigintType.BIGINT) SORTED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) LocalDate(java.time.LocalDate) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) HiveTestUtils.getDefaultHivePageSourceFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHivePageSourceFactories) Constraint(io.trino.spi.connector.Constraint) Assert.assertNull(org.testng.Assert.assertNull) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) OptionalInt(java.util.OptionalInt) Function(java.util.function.Function) HashSet(java.util.HashSet) HYPER_LOG_LOG(io.trino.spi.type.HyperLogLogType.HYPER_LOG_LOG) ImmutableList(com.google.common.collect.ImmutableList) BridgingHiveMetastore(io.trino.plugin.hive.metastore.thrift.BridgingHiveMetastore) TableStatistics(io.trino.spi.statistics.TableStatistics) HiveColumnHandle.createBaseColumn(io.trino.plugin.hive.HiveColumnHandle.createBaseColumn) Math.toIntExact(java.lang.Math.toIntExact) ExecutorService(java.util.concurrent.ExecutorService) ConnectorPageSink(io.trino.spi.connector.ConnectorPageSink) DESC_NULLS_LAST(io.trino.spi.connector.SortOrder.DESC_NULLS_LAST) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveColumnStatistics.createIntegerColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics) UTF_8(java.nio.charset.StandardCharsets.UTF_8) Assert.fail(org.testng.Assert.fail) ConnectorPageSourceProvider(io.trino.spi.connector.ConnectorPageSourceProvider) DateTime(org.joda.time.DateTime) PAGE_SORTER(io.trino.plugin.hive.HiveTestUtils.PAGE_SORTER) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) HIVE_STRING(io.trino.plugin.hive.HiveType.HIVE_STRING) Hashing.sha256(com.google.common.hash.Hashing.sha256) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) PRESTO_VERSION_NAME(io.trino.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) Assert.assertTrue(org.testng.Assert.assertTrue) PrincipalPrivileges(io.trino.plugin.hive.metastore.PrincipalPrivileges) ConnectorPageSinkProvider(io.trino.spi.connector.ConnectorPageSinkProvider) ConnectorTransactionHandle(io.trino.spi.connector.ConnectorTransactionHandle) ConnectorSplitManager(io.trino.spi.connector.ConnectorSplitManager) Arrays(java.util.Arrays) NamedTypeSignature(io.trino.spi.type.NamedTypeSignature) USER(io.trino.spi.security.PrincipalType.USER) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) HiveColumnStatistics.createDecimalColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createDecimalColumnStatistics) TypeOperators(io.trino.spi.type.TypeOperators) ROLLBACK_AFTER_APPEND_PAGE(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) HiveTestUtils.rowType(io.trino.plugin.hive.HiveTestUtils.rowType) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) CharType.createCharType(io.trino.spi.type.CharType.createCharType) BigDecimal(java.math.BigDecimal) TypeId(io.trino.spi.type.TypeId) Sets.difference(com.google.common.collect.Sets.difference) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) Column(io.trino.plugin.hive.metastore.Column) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) ConnectorOutputTableHandle(io.trino.spi.connector.ConnectorOutputTableHandle) ConnectorTableHandle(io.trino.spi.connector.ConnectorTableHandle) ViewColumn(io.trino.spi.connector.ConnectorViewDefinition.ViewColumn) ProjectionApplicationResult(io.trino.spi.connector.ProjectionApplicationResult) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) SMALLINT(io.trino.spi.type.SmallintType.SMALLINT) HiveTestUtils.getDefaultHiveRecordCursorProviders(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProviders) ConnectorNodePartitioningProvider(io.trino.spi.connector.ConnectorNodePartitioningProvider) Table(io.trino.plugin.hive.metastore.Table) TestingNodeManager(io.trino.testing.TestingNodeManager) Range(io.trino.spi.predicate.Range) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) PARTITIONED_BY_PROPERTY(io.trino.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) RcFilePageSource(io.trino.plugin.hive.rcfile.RcFilePageSource) Set(java.util.Set) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) SchemaTableName(io.trino.spi.connector.SchemaTableName) SortingProperty(io.trino.spi.connector.SortingProperty) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveColumnStatistics.createBooleanColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBooleanColumnStatistics) SchemaTablePrefix(io.trino.spi.connector.SchemaTablePrefix) Lists.reverse(com.google.common.collect.Lists.reverse) DATE(io.trino.spi.type.DateType.DATE) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) HiveColumnHandle.bucketColumnHandle(io.trino.plugin.hive.HiveColumnHandle.bucketColumnHandle) HivePrincipal(io.trino.plugin.hive.metastore.HivePrincipal) ConnectorTableLayout(io.trino.spi.connector.ConnectorTableLayout) ConnectorInsertTableHandle(io.trino.spi.connector.ConnectorInsertTableHandle) Slice(io.airlift.slice.Slice) NullableValue(io.trino.spi.predicate.NullableValue) Page(io.trino.spi.Page) BOOLEAN(io.trino.spi.type.BooleanType.BOOLEAN) MINUTES(java.util.concurrent.TimeUnit.MINUTES) JoinCompiler(io.trino.sql.gen.JoinCompiler) HiveUtil.columnExtraInfo(io.trino.plugin.hive.util.HiveUtil.columnExtraInfo) GroupByHashPageIndexerFactory(io.trino.operator.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) ALLOW_INSECURE(com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE) UNGROUPED_SCHEDULING(io.trino.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) ColumnHandle(io.trino.spi.connector.ColumnHandle) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) VARBINARY(io.trino.spi.type.VarbinaryType.VARBINARY) HIVE_INVALID_BUCKET_FILES(io.trino.plugin.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES) HiveType.toHiveType(io.trino.plugin.hive.HiveType.toHiveType) TestingMetastoreLocator(io.trino.plugin.hive.metastore.thrift.TestingMetastoreLocator) STORAGE_FORMAT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) GoogleGcsConfigurationInitializer(io.trino.plugin.hive.gcs.GoogleGcsConfigurationInitializer) ConstraintApplicationResult(io.trino.spi.connector.ConstraintApplicationResult) RecordCursor(io.trino.spi.connector.RecordCursor) BlockTypeOperators(io.trino.type.BlockTypeOperators) LongStream(java.util.stream.LongStream) NO_REDIRECTIONS(io.trino.plugin.hive.HiveTableRedirectionsProvider.NO_REDIRECTIONS) DecimalType.createDecimalType(io.trino.spi.type.DecimalType.createDecimalType) HIVE_LONG(io.trino.plugin.hive.HiveType.HIVE_LONG) HiveTestUtils.getTypes(io.trino.plugin.hive.HiveTestUtils.getTypes) TRANSACTION_CONFLICT(io.trino.spi.StandardErrorCode.TRANSACTION_CONFLICT) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) ConnectorSession(io.trino.spi.connector.ConnectorSession) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) UTC(org.joda.time.DateTimeZone.UTC) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) SqlVarbinary(io.trino.spi.type.SqlVarbinary) DiscretePredicates(io.trino.spi.connector.DiscretePredicates) CharType(io.trino.spi.type.CharType) TableType(org.apache.hadoop.hive.metastore.TableType) CachingHiveMetastore.cachingHiveMetastore(io.trino.plugin.hive.metastore.cache.CachingHiveMetastore.cachingHiveMetastore) TINYINT(io.trino.spi.type.TinyintType.TINYINT) DateTimeTestingUtils.sqlTimestampOf(io.trino.testing.DateTimeTestingUtils.sqlTimestampOf) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) HiveColumnStatistics.createBinaryColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics.createBinaryColumnStatistics) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) NoHdfsAuthentication(io.trino.plugin.hive.authentication.NoHdfsAuthentication) QueryAssertions.assertEqualsIgnoreOrder(io.trino.testing.QueryAssertions.assertEqualsIgnoreOrder) HiveGcsConfig(io.trino.plugin.hive.gcs.HiveGcsConfig) Iterables.concat(com.google.common.collect.Iterables.concat) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) TIMESTAMP_WITH_TIME_ZONE(io.trino.spi.type.TimestampWithTimeZoneType.TIMESTAMP_WITH_TIME_ZONE) AVRO(io.trino.plugin.hive.HiveStorageFormat.AVRO) StorageFormat(io.trino.plugin.hive.metastore.StorageFormat) RowType(io.trino.spi.type.RowType) HiveWriteUtils.getTableDefaultLocation(io.trino.plugin.hive.util.HiveWriteUtils.getTableDefaultLocation) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) ThriftHiveMetastore(io.trino.plugin.hive.metastore.thrift.ThriftHiveMetastore) HiveSessionProperties.getTemporaryStagingDirectoryPath(io.trino.plugin.hive.HiveSessionProperties.getTemporaryStagingDirectoryPath) TrinoException(io.trino.spi.TrinoException) ArrayType(io.trino.spi.type.ArrayType) MaterializedResult.materializeSourceDataStream(io.trino.testing.MaterializedResult.materializeSourceDataStream) ConnectorBucketNodeMap(io.trino.spi.connector.ConnectorBucketNodeMap) ASC_NULLS_FIRST(io.trino.spi.connector.SortOrder.ASC_NULLS_FIRST) String.format(java.lang.String.format) SqlDate(io.trino.spi.type.SqlDate) Preconditions.checkState(com.google.common.base.Preconditions.checkState) SqlTimestampWithTimeZone(io.trino.spi.type.SqlTimestampWithTimeZone) DataSize(io.airlift.units.DataSize) HdfsContext(io.trino.plugin.hive.HdfsEnvironment.HdfsContext) List(java.util.List) DynamicFilter(io.trino.spi.connector.DynamicFilter) Assignment(io.trino.spi.connector.Assignment) Optional(java.util.Optional) ConnectorMetadata(io.trino.spi.connector.ConnectorMetadata) HivePrivilegeInfo(io.trino.plugin.hive.metastore.HivePrivilegeInfo) SqlStandardAccessControlMetadata(io.trino.plugin.hive.security.SqlStandardAccessControlMetadata) Logger(io.airlift.log.Logger) MetastoreConfig(io.trino.plugin.hive.metastore.MetastoreConfig) Type(io.trino.spi.type.Type) VarcharType.createUnboundedVarcharType(io.trino.spi.type.VarcharType.createUnboundedVarcharType) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) HiveBasicStatistics.createZeroStatistics(io.trino.plugin.hive.HiveBasicStatistics.createZeroStatistics) CSV(io.trino.plugin.hive.HiveStorageFormat.CSV) AtomicReference(java.util.concurrent.atomic.AtomicReference) VarcharType(io.trino.spi.type.VarcharType) HiveColumnStatistics(io.trino.plugin.hive.metastore.HiveColumnStatistics) ROLLBACK_AFTER_DELETE(io.trino.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) Assertions.assertThatThrownBy(org.assertj.core.api.Assertions.assertThatThrownBy) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) SemiTransactionalHiveMetastore(io.trino.plugin.hive.metastore.SemiTransactionalHiveMetastore) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) RowFieldName(io.trino.spi.type.RowFieldName) JSON(io.trino.plugin.hive.HiveStorageFormat.JSON) RCBINARY(io.trino.plugin.hive.HiveStorageFormat.RCBINARY) NO_PRIVILEGES(io.trino.plugin.hive.metastore.PrincipalPrivileges.NO_PRIVILEGES) DELTA_LAKE_PROVIDER(io.trino.plugin.hive.util.HiveUtil.DELTA_LAKE_PROVIDER) ColumnStatistics(io.trino.spi.statistics.ColumnStatistics) FieldDereference(io.trino.spi.expression.FieldDereference) HiveTestUtils.getDefaultHiveFileWriterFactories(io.trino.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) HiveTestUtils.getHiveSessionProperties(io.trino.plugin.hive.HiveTestUtils.getHiveSessionProperties) TupleDomain(io.trino.spi.predicate.TupleDomain) HiveWriteUtils.createDirectory(io.trino.plugin.hive.util.HiveWriteUtils.createDirectory) TestingConnectorSession(io.trino.testing.TestingConnectorSession) HiveS3Config(io.trino.plugin.hive.s3.HiveS3Config) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) BUCKET_COUNT_PROPERTY(io.trino.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) SortingColumn(io.trino.plugin.hive.metastore.SortingColumn) SECONDS(java.util.concurrent.TimeUnit.SECONDS) REGULAR(io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) TableNotFoundException(io.trino.spi.connector.TableNotFoundException) Partition(io.trino.plugin.hive.metastore.Partition) Table(io.trino.plugin.hive.metastore.Table) PartitionWithStatistics(io.trino.plugin.hive.metastore.PartitionWithStatistics) CatalogSchemaTableName(io.trino.spi.connector.CatalogSchemaTableName) SchemaTableName(io.trino.spi.connector.SchemaTableName)

Example 5 with ORC

use of io.trino.plugin.hive.HiveStorageFormat.ORC in project trino by trinodb.

the class TestHiveFileFormats method testOrcUseColumnNameLowerCaseConversion.

@Test(dataProvider = "rowCount")
public void testOrcUseColumnNameLowerCaseConversion(int rowCount) throws Exception {
    List<TestColumn> testColumnsUpperCase = TEST_COLUMNS.stream().map(testColumn -> new TestColumn(testColumn.getName().toUpperCase(Locale.ENGLISH), testColumn.getObjectInspector(), testColumn.getWriteValue(), testColumn.getExpectedValue(), testColumn.isPartitionKey())).collect(toList());
    ConnectorSession session = getHiveSession(new HiveConfig(), new OrcReaderConfig().setUseColumnNames(true));
    assertThatFileFormat(ORC).withWriteColumns(testColumnsUpperCase).withRowsCount(rowCount).withReadColumns(TEST_COLUMNS).withSession(session).isReadableByPageSource(new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC));
}
Also used : OrcFileWriterFactory(io.trino.plugin.hive.orc.OrcFileWriterFactory) ParquetFileWriterFactory(io.trino.plugin.hive.parquet.ParquetFileWriterFactory) Test(org.testng.annotations.Test) NO_ACID_TRANSACTION(io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION) HiveTestUtils.createGenericHiveRecordCursorProvider(io.trino.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) TrinoExceptionAssert.assertTrinoExceptionThrownBy(io.trino.testing.assertions.TrinoExceptionAssert.assertTrinoExceptionThrownBy) PARQUET(io.trino.plugin.hive.HiveStorageFormat.PARQUET) FileSplit(org.apache.hadoop.mapred.FileSplit) Locale(java.util.Locale) Configuration(org.apache.hadoop.conf.Configuration) StructuralTestUtil.rowBlockOf(io.trino.testing.StructuralTestUtil.rowBlockOf) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSource(io.trino.spi.connector.ConnectorPageSource) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) AVRO(io.trino.plugin.hive.HiveStorageFormat.AVRO) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) ImmutableSet(com.google.common.collect.ImmutableSet) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) HDFS_ENVIRONMENT(io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) List(java.util.List) ColumnMapping.buildColumnMappings(io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.buildColumnMappings) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) Optional(java.util.Optional) ParquetReaderConfig(io.trino.plugin.hive.parquet.ParquetReaderConfig) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) RcFilePageSourceFactory(io.trino.plugin.hive.rcfile.RcFilePageSourceFactory) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) Type(io.trino.spi.type.Type) Assert.assertEquals(org.testng.Assert.assertEquals) CSV(io.trino.plugin.hive.HiveStorageFormat.CSV) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) SymlinkTextInputFormat(org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) ParquetPageSourceFactory(io.trino.plugin.hive.parquet.ParquetPageSourceFactory) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) ParquetWriterConfig(io.trino.plugin.hive.parquet.ParquetWriterConfig) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) SEQUENCEFILE(io.trino.plugin.hive.HiveStorageFormat.SEQUENCEFILE) OrcReaderOptions(io.trino.orc.OrcReaderOptions) OrcPageSourceFactory(io.trino.plugin.hive.orc.OrcPageSourceFactory) RecordPageSource(io.trino.spi.connector.RecordPageSource) Objects.requireNonNull(java.util.Objects.requireNonNull) TEXTFILE(io.trino.plugin.hive.HiveStorageFormat.TEXTFILE) JSON(io.trino.plugin.hive.HiveStorageFormat.JSON) OrcWriterConfig(io.trino.plugin.hive.orc.OrcWriterConfig) RCBINARY(io.trino.plugin.hive.HiveStorageFormat.RCBINARY) RecordCursor(io.trino.spi.connector.RecordCursor) Properties(java.util.Properties) ORC(io.trino.plugin.hive.HiveStorageFormat.ORC) HiveTestUtils.getTypes(io.trino.plugin.hive.HiveTestUtils.getTypes) TESTING_TYPE_MANAGER(io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER) IOException(java.io.IOException) ConnectorSession(io.trino.spi.connector.ConnectorSession) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) TupleDomain(io.trino.spi.predicate.TupleDomain) UTC(org.joda.time.DateTimeZone.UTC) File(java.io.File) TestingConnectorSession(io.trino.testing.TestingConnectorSession) SESSION(io.trino.plugin.hive.HiveTestUtils.SESSION) HiveTestUtils.getHiveSession(io.trino.plugin.hive.HiveTestUtils.getHiveSession) Collectors.toList(java.util.stream.Collectors.toList) OrcWriterOptions(io.trino.orc.OrcWriterOptions) RCTEXT(io.trino.plugin.hive.HiveStorageFormat.RCTEXT) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) OrcReaderConfig(io.trino.plugin.hive.orc.OrcReaderConfig) OrcReaderOptions(io.trino.orc.OrcReaderOptions) ConnectorSession(io.trino.spi.connector.ConnectorSession) TestingConnectorSession(io.trino.testing.TestingConnectorSession) OrcPageSourceFactory(io.trino.plugin.hive.orc.OrcPageSourceFactory) Test(org.testng.annotations.Test)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)5 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)5 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)5 ORC (io.trino.plugin.hive.HiveStorageFormat.ORC)5 TEXTFILE (io.trino.plugin.hive.HiveStorageFormat.TEXTFILE)5 NO_ACID_TRANSACTION (io.trino.plugin.hive.acid.AcidTransaction.NO_ACID_TRANSACTION)5 ConnectorSession (io.trino.spi.connector.ConnectorSession)5 TupleDomain (io.trino.spi.predicate.TupleDomain)5 ImmutableMap (com.google.common.collect.ImmutableMap)3 MoreFutures.getFutureValue (io.airlift.concurrent.MoreFutures.getFutureValue)3 Logger (io.airlift.log.Logger)3 Slice (io.airlift.slice.Slice)3 HiveBasicStatistics.createEmptyStatistics (io.trino.plugin.hive.HiveBasicStatistics.createEmptyStatistics)3 HiveIdentity (io.trino.plugin.hive.authentication.HiveIdentity)3 HiveColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics)3 HiveColumnStatistics.createIntegerColumnStatistics (io.trino.plugin.hive.metastore.HiveColumnStatistics.createIntegerColumnStatistics)3 HiveMetastore (io.trino.plugin.hive.metastore.HiveMetastore)3 MetastoreConfig (io.trino.plugin.hive.metastore.MetastoreConfig)3 PartitionWithStatistics (io.trino.plugin.hive.metastore.PartitionWithStatistics)3 Table (io.trino.plugin.hive.metastore.Table)3