Search in sources :

Example 1 with TEXTFILE

use of io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE in project hetu-core by openlookeng.

the class AbstractTestHive method testStorePartitionWithStatistics.

protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics) throws Exception {
    SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
    try {
        doCreateEmptyTable(tableName, ORC, columns);
        HiveMetastoreClosure hiveMetastoreClient = new HiveMetastoreClosure(getMetastoreClient());
        HiveIdentity identity = new HiveIdentity(SESSION);
        Table table = hiveMetastoreClient.getTable(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
        List<String> partitionValues = ImmutableList.of("2016-01-01");
        String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
        Partition partition = createDummyPartition(table, partitionName);
        // create partition with stats for all columns
        hiveMetastoreClient.addPartitions(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
        assertEquals(hiveMetastoreClient.getPartition(identity, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), StorageFormat.fromHiveStorageFormat(ORC));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
        // alter the partition into one with other stats
        Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(RCBINARY)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
        assertEquals(hiveMetastoreClient.getPartition(identity, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), StorageFormat.fromHiveStorageFormat(RCBINARY));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
        // alter the partition into one with stats for only subset of columns
        modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
        // alter the partition into one without stats
        modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
    } finally {
        dropTable(tableName);
    }
}
Also used : ROLLBACK_AFTER_FINISH_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) HiveType.toHiveType(io.prestosql.plugin.hive.HiveType.toHiveType) TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) FileStatus(org.apache.hadoop.fs.FileStatus) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) TypeSignature.parseTypeSignature(io.prestosql.spi.type.TypeSignature.parseTypeSignature) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) RowType(io.prestosql.spi.type.RowType) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Table(io.prestosql.plugin.hive.metastore.Table) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) HiveBasicStatistics.createZeroStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) MetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.MetastoreLocator) LocalDateTime(java.time.LocalDateTime) PRESTO_QUERY_ID_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ThriftHiveMetastoreConfig(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastoreConfig) OptionalLong(java.util.OptionalLong) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) PARTITION_KEY(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) ThriftHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastore) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) AfterClass(org.testng.annotations.AfterClass) HiveTestUtils.mapType(io.prestosql.plugin.hive.HiveTestUtils.mapType) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ROLLBACK_RIGHT_AWAY(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) HostAndPort(com.google.common.net.HostAndPort) USER(io.prestosql.spi.security.PrincipalType.USER) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) VARBINARY(io.prestosql.spi.type.VarbinaryType.VARBINARY) HiveTestUtils.getDefaultOrcFileWriterFactory(io.prestosql.plugin.hive.HiveTestUtils.getDefaultOrcFileWriterFactory) ConnectorPageSourceProvider(io.prestosql.spi.connector.ConnectorPageSourceProvider) ROLLBACK_AFTER_APPEND_PAGE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) ConnectorSplitManager(io.prestosql.spi.connector.ConnectorSplitManager) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) MaterializedResult.materializeSourceDataStream(io.prestosql.testing.MaterializedResult.materializeSourceDataStream) MaterializedResult(io.prestosql.testing.MaterializedResult) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) RcFilePageSource(io.prestosql.plugin.hive.rcfile.RcFilePageSource) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) DecimalType.createDecimalType(io.prestosql.spi.type.DecimalType.createDecimalType) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics.createEmptyStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ImmutableSet(com.google.common.collect.ImmutableSet) CachingHiveMetastore(io.prestosql.plugin.hive.metastore.CachingHiveMetastore) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) ROLLBACK_AFTER_DELETE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) Assert.assertNotNull(org.testng.Assert.assertNotNull) HYPER_LOG_LOG(io.prestosql.spi.type.HyperLogLogType.HYPER_LOG_LOG) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) SqlTimestamp(io.prestosql.spi.type.SqlTimestamp) BUCKET_COLUMN_NAME(io.prestosql.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) Assert.assertNull(org.testng.Assert.assertNull) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) SqlDate(io.prestosql.spi.type.SqlDate) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) BUCKETED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) ViewColumn(io.prestosql.spi.connector.ConnectorViewDefinition.ViewColumn) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) DATE(io.prestosql.spi.type.DateType.DATE) Math.toIntExact(java.lang.Math.toIntExact) STORAGE_FORMAT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) Block(io.prestosql.spi.block.Block) ExecutorService(java.util.concurrent.ExecutorService) Collections.emptyMap(java.util.Collections.emptyMap) ParquetPageSource(io.prestosql.plugin.hive.parquet.ParquetPageSource) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) Page(io.prestosql.spi.Page) HiveTestUtils.getDefaultHiveDataStreamFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) Hashing.sha256(com.google.common.hash.Hashing.sha256) BUCKETING_V1(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V1) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) PARTITIONED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) Assert.assertTrue(org.testng.Assert.assertTrue) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) ROLLBACK_AFTER_BEGIN_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) Arrays(java.util.Arrays) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) NoHdfsAuthentication(io.prestosql.plugin.hive.authentication.NoHdfsAuthentication) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ValueSet(io.prestosql.spi.predicate.ValueSet) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) BigDecimal(java.math.BigDecimal) Sets.difference(com.google.common.collect.Sets.difference) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) RowFieldName(io.prestosql.spi.type.RowFieldName) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSinkProvider(io.prestosql.spi.connector.ConnectorPageSinkProvider) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) UNGROUPED_SCHEDULING(io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SqlStandardAccessControlMetadata(io.prestosql.plugin.hive.security.SqlStandardAccessControlMetadata) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) VarcharType.createVarcharType(io.prestosql.spi.type.VarcharType.createVarcharType) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Domain(io.prestosql.spi.predicate.Domain) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Lists.reverse(com.google.common.collect.Lists.reverse) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) StandardTypes(io.prestosql.spi.type.StandardTypes) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) BUCKET_COUNT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) MapType(io.prestosql.spi.type.MapType) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) HiveTestUtils.rowType(io.prestosql.plugin.hive.HiveTestUtils.rowType) RecordCursor(io.prestosql.spi.connector.RecordCursor) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SESSION(io.prestosql.plugin.hive.HiveTestUtils.SESSION) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) LongStream(java.util.stream.LongStream) MULTIDELIMIT(io.prestosql.plugin.hive.HiveStorageFormat.MULTIDELIMIT) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.prestosql.plugin.hive.HiveTestUtils.PAGE_SORTER) UTC(org.joda.time.DateTimeZone.UTC) MaterializedRow(io.prestosql.testing.MaterializedRow) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) DateTimeTestingUtils.sqlTimestampOf(io.prestosql.testing.DateTimeTestingUtils.sqlTimestampOf) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) TableType(org.apache.hadoop.hive.metastore.TableType) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) OrcPageSource(io.prestosql.plugin.hive.orc.OrcPageSource) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) Iterables.concat(com.google.common.collect.Iterables.concat) HiveWriteUtils.createDirectory(io.prestosql.plugin.hive.HiveWriteUtils.createDirectory) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) Constraint(io.prestosql.spi.connector.Constraint) ImmutableMap(com.google.common.collect.ImmutableMap) ArrayType(io.prestosql.spi.type.ArrayType) CharType.createCharType(io.prestosql.spi.type.CharType.createCharType) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) HiveTestUtils.getDefaultHiveFileWriterFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) PRESTO_VERSION_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.getTypes(io.prestosql.plugin.hive.HiveTestUtils.getTypes) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) SORTED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) Logger(io.airlift.log.Logger) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) AtomicReference(java.util.concurrent.atomic.AtomicReference) SqlVarbinary(io.prestosql.spi.type.SqlVarbinary) BridgingHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.BridgingHiveMetastore) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) COMMIT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) TestingMetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.TestingMetastoreLocator) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Range(io.prestosql.spi.predicate.Range) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) HivePrivilegeInfo(io.prestosql.plugin.hive.metastore.HivePrivilegeInfo) Objects.requireNonNull(java.util.Objects.requireNonNull) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) REAL(io.prestosql.spi.type.RealType.REAL) HiveMetadata.convertToPredicate(io.prestosql.plugin.hive.HiveMetadata.convertToPredicate) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) HiveTestUtils.getNoOpIndexCache(io.prestosql.plugin.hive.HiveTestUtils.getNoOpIndexCache) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TRANSACTION_CONFLICT(io.prestosql.spi.StandardErrorCode.TRANSACTION_CONFLICT) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) HiveTestUtils.getDefaultHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) HiveTestUtils.arrayType(io.prestosql.plugin.hive.HiveTestUtils.arrayType) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ROLLBACK_AFTER_SINK_FINISH(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Partition(io.prestosql.plugin.hive.metastore.Partition) Table(io.prestosql.plugin.hive.metastore.Table) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity)

Example 2 with TEXTFILE

use of io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE in project hetu-core by openlookeng.

the class TestHiveFileFormats method testTextFile.

@Test(dataProvider = "rowCount")
public void testTextFile(int rowCount) throws Exception {
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(column -> !column.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
    assertThatFileFormat(TEXTFILE).withColumns(testColumns).withRowsCount(rowCount).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
Also used : Iterables.transform(com.google.common.collect.Iterables.transform) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) Test(org.testng.annotations.Test) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) FileSplit(org.apache.hadoop.mapred.FileSplit) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Predicates.not(com.google.common.base.Predicates.not) Locale(java.util.Locale) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Duration(java.time.Duration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ParquetPageSourceFactory(io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory) HDFS_ENVIRONMENT(io.prestosql.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) Optional(java.util.Optional) Iterables.filter(com.google.common.collect.Iterables.filter) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) Logger(io.airlift.log.Logger) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) Assert.assertEquals(org.testng.Assert.assertEquals) RcFilePageSourceFactory(io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) RecordCursor(io.prestosql.spi.connector.RecordCursor) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) Properties(java.util.Properties) TupleDomain(io.prestosql.spi.predicate.TupleDomain) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) File(java.io.File) OrcWriterOptions(io.prestosql.orc.OrcWriterOptions) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) Collectors.toList(java.util.stream.Collectors.toList) OrcCacheStore(io.prestosql.orc.OrcCacheStore) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) Test(org.testng.annotations.Test)

Example 3 with TEXTFILE

use of io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE in project boostkit-bigdata by kunpengcompute.

the class AbstractTestHive method testStorePartitionWithStatistics.

protected void testStorePartitionWithStatistics(List<ColumnMetadata> columns, PartitionStatistics statsForAllColumns1, PartitionStatistics statsForAllColumns2, PartitionStatistics statsForSubsetOfColumns, PartitionStatistics emptyStatistics) throws Exception {
    SchemaTableName tableName = temporaryTable("store_partition_with_statistics");
    try {
        doCreateEmptyTable(tableName, ORC, columns);
        HiveMetastoreClosure hiveMetastoreClient = new HiveMetastoreClosure(getMetastoreClient());
        HiveIdentity identity = new HiveIdentity(SESSION);
        Table table = hiveMetastoreClient.getTable(identity, tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new TableNotFoundException(tableName));
        List<String> partitionValues = ImmutableList.of("2016-01-01");
        String partitionName = makePartName(ImmutableList.of("ds"), partitionValues);
        Partition partition = createDummyPartition(table, partitionName);
        // create partition with stats for all columns
        hiveMetastoreClient.addPartitions(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableList.of(new PartitionWithStatistics(partition, partitionName, statsForAllColumns1)));
        assertEquals(hiveMetastoreClient.getPartition(identity, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), StorageFormat.fromHiveStorageFormat(ORC));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns1));
        // alter the partition into one with other stats
        Partition modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(RCBINARY)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForAllColumns2));
        assertEquals(hiveMetastoreClient.getPartition(identity, tableName.getSchemaName(), tableName.getTableName(), partitionValues).get().getStorage().getStorageFormat(), StorageFormat.fromHiveStorageFormat(RCBINARY));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForAllColumns2));
        // alter the partition into one with stats for only subset of columns
        modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, statsForSubsetOfColumns));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, statsForSubsetOfColumns));
        // alter the partition into one without stats
        modifiedPartition = Partition.builder(partition).withStorage(storage -> storage.setStorageFormat(StorageFormat.fromHiveStorageFormat(TEXTFILE)).setLocation(partitionTargetPath(tableName, partitionName))).build();
        hiveMetastoreClient.alterPartition(identity, tableName.getSchemaName(), tableName.getTableName(), new PartitionWithStatistics(modifiedPartition, partitionName, emptyStatistics));
        assertThat(hiveMetastoreClient.getPartitionStatistics(identity, tableName.getSchemaName(), tableName.getTableName(), ImmutableSet.of(partitionName))).isEqualTo(ImmutableMap.of(partitionName, emptyStatistics));
    } finally {
        dropTable(tableName);
    }
}
Also used : ROLLBACK_AFTER_FINISH_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_FINISH_INSERT) HiveType.toHiveType(io.prestosql.plugin.hive.HiveType.toHiveType) TableStatistics(io.prestosql.spi.statistics.TableStatistics) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) Assertions.assertInstanceOf(io.airlift.testing.Assertions.assertInstanceOf) FileSystem(org.apache.hadoop.fs.FileSystem) Test(org.testng.annotations.Test) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) NullableValue(io.prestosql.spi.predicate.NullableValue) FileStatus(org.apache.hadoop.fs.FileStatus) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) TypeSignature.parseTypeSignature(io.prestosql.spi.type.TypeSignature.parseTypeSignature) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Files.createTempDirectory(java.nio.file.Files.createTempDirectory) Map(java.util.Map) RowType(io.prestosql.spi.type.RowType) ENGLISH(java.util.Locale.ENGLISH) Assert.assertFalse(org.testng.Assert.assertFalse) Chars.isCharType(io.prestosql.spi.type.Chars.isCharType) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) MoreExecutors.directExecutor(com.google.common.util.concurrent.MoreExecutors.directExecutor) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) Table(io.prestosql.plugin.hive.metastore.Table) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) SchemaTablePrefix(io.prestosql.spi.connector.SchemaTablePrefix) HiveBasicStatistics.createZeroStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createZeroStatistics) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) MetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.MetastoreLocator) LocalDateTime(java.time.LocalDateTime) PRESTO_QUERY_ID_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_QUERY_ID_NAME) ThriftHiveMetastoreConfig(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastoreConfig) OptionalLong(java.util.OptionalLong) REGULAR(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.REGULAR) PARTITION_KEY(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.PARTITION_KEY) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ImmutableSet.toImmutableSet(com.google.common.collect.ImmutableSet.toImmutableSet) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) ThriftHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.ThriftHiveMetastore) DiscretePredicates(io.prestosql.spi.connector.DiscretePredicates) Assertions.assertGreaterThanOrEqual(io.airlift.testing.Assertions.assertGreaterThanOrEqual) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) AfterClass(org.testng.annotations.AfterClass) HiveTestUtils.mapType(io.prestosql.plugin.hive.HiveTestUtils.mapType) FileUtils.makePartName(org.apache.hadoop.hive.common.FileUtils.makePartName) IOException(java.io.IOException) Iterables.getOnlyElement(com.google.common.collect.Iterables.getOnlyElement) ROLLBACK_RIGHT_AWAY(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_RIGHT_AWAY) HostAndPort(com.google.common.net.HostAndPort) USER(io.prestosql.spi.security.PrincipalType.USER) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) VARBINARY(io.prestosql.spi.type.VarbinaryType.VARBINARY) HiveTestUtils.getDefaultOrcFileWriterFactory(io.prestosql.plugin.hive.HiveTestUtils.getDefaultOrcFileWriterFactory) ConnectorPageSourceProvider(io.prestosql.spi.connector.ConnectorPageSourceProvider) ROLLBACK_AFTER_APPEND_PAGE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_APPEND_PAGE) Varchars.isVarcharType(io.prestosql.spi.type.Varchars.isVarcharType) ConnectorSplitManager(io.prestosql.spi.connector.ConnectorSplitManager) ViewNotFoundException(io.prestosql.spi.connector.ViewNotFoundException) MaterializedResult.materializeSourceDataStream(io.prestosql.testing.MaterializedResult.materializeSourceDataStream) MaterializedResult(io.prestosql.testing.MaterializedResult) Duration(io.airlift.units.Duration) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) ConnectorTableProperties(io.prestosql.spi.connector.ConnectorTableProperties) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Type(io.prestosql.spi.type.Type) RcFilePageSource(io.prestosql.plugin.hive.rcfile.RcFilePageSource) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) DecimalType.createDecimalType(io.prestosql.spi.type.DecimalType.createDecimalType) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics.createEmptyStatistics(io.prestosql.plugin.hive.HiveBasicStatistics.createEmptyStatistics) ImmutableSet(com.google.common.collect.ImmutableSet) CachingHiveMetastore(io.prestosql.plugin.hive.metastore.CachingHiveMetastore) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) ROLLBACK_AFTER_DELETE(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_DELETE) HiveUtil.columnExtraInfo(io.prestosql.plugin.hive.HiveUtil.columnExtraInfo) BeforeClass(org.testng.annotations.BeforeClass) Collection(java.util.Collection) UUID(java.util.UUID) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) Assert.assertNotNull(org.testng.Assert.assertNotNull) HYPER_LOG_LOG(io.prestosql.spi.type.HyperLogLogType.HYPER_LOG_LOG) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) JsonCodec(io.airlift.json.JsonCodec) IntStream(java.util.stream.IntStream) NOT_PARTITIONED(io.prestosql.spi.connector.NotPartitionedPartitionHandle.NOT_PARTITIONED) SqlTimestamp(io.prestosql.spi.type.SqlTimestamp) BUCKET_COLUMN_NAME(io.prestosql.plugin.hive.HiveColumnHandle.BUCKET_COLUMN_NAME) Assert.assertNull(org.testng.Assert.assertNull) ConnectorViewDefinition(io.prestosql.spi.connector.ConnectorViewDefinition) SqlDate(io.prestosql.spi.type.SqlDate) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) OptionalDouble(java.util.OptionalDouble) Assert.assertEquals(org.testng.Assert.assertEquals) BUCKETED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKETED_BY_PROPERTY) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) OptionalInt(java.util.OptionalInt) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) ViewColumn(io.prestosql.spi.connector.ConnectorViewDefinition.ViewColumn) HiveColumnHandle.bucketColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle.bucketColumnHandle) DATE(io.prestosql.spi.type.DateType.DATE) Math.toIntExact(java.lang.Math.toIntExact) STORAGE_FORMAT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.STORAGE_FORMAT_PROPERTY) Block(io.prestosql.spi.block.Block) ExecutorService(java.util.concurrent.ExecutorService) Collections.emptyMap(java.util.Collections.emptyMap) ParquetPageSource(io.prestosql.plugin.hive.parquet.ParquetPageSource) UTF_8(java.nio.charset.StandardCharsets.UTF_8) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Assert.fail(org.testng.Assert.fail) DateTime(org.joda.time.DateTime) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) Page(io.prestosql.spi.Page) HiveTestUtils.getDefaultHiveDataStreamFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveDataStreamFactories) Executors.newFixedThreadPool(java.util.concurrent.Executors.newFixedThreadPool) Hashing.sha256(com.google.common.hash.Hashing.sha256) BUCKETING_V1(io.prestosql.plugin.hive.HiveBucketing.BucketingVersion.BUCKETING_V1) Assertions.assertEqualsIgnoreOrder(io.airlift.testing.Assertions.assertEqualsIgnoreOrder) PARTITIONED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.PARTITIONED_BY_PROPERTY) Collectors.toList(java.util.stream.Collectors.toList) Column(io.prestosql.plugin.hive.metastore.Column) JoinCompiler(io.prestosql.sql.gen.JoinCompiler) Assert.assertTrue(org.testng.Assert.assertTrue) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) OrcConcatPageSource(io.prestosql.plugin.hive.orc.OrcConcatPageSource) ROLLBACK_AFTER_BEGIN_INSERT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_BEGIN_INSERT) Arrays(java.util.Arrays) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) NoHdfsAuthentication(io.prestosql.plugin.hive.authentication.NoHdfsAuthentication) ConnectorPageSink(io.prestosql.spi.connector.ConnectorPageSink) ValueSet(io.prestosql.spi.predicate.ValueSet) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) BigDecimal(java.math.BigDecimal) Sets.difference(com.google.common.collect.Sets.difference) Executors.newScheduledThreadPool(java.util.concurrent.Executors.newScheduledThreadPool) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) RowFieldName(io.prestosql.spi.type.RowFieldName) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ConnectorPageSinkProvider(io.prestosql.spi.connector.ConnectorPageSinkProvider) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) HIVE_INT(io.prestosql.plugin.hive.HiveType.HIVE_INT) HIVE_LONG(io.prestosql.plugin.hive.HiveType.HIVE_LONG) ConstraintApplicationResult(io.prestosql.spi.connector.ConstraintApplicationResult) UNGROUPED_SCHEDULING(io.prestosql.spi.connector.ConnectorSplitManager.SplitSchedulingStrategy.UNGROUPED_SCHEDULING) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SqlStandardAccessControlMetadata(io.prestosql.plugin.hive.security.SqlStandardAccessControlMetadata) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) MILLISECONDS(java.util.concurrent.TimeUnit.MILLISECONDS) VarcharType.createVarcharType(io.prestosql.spi.type.VarcharType.createVarcharType) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Domain(io.prestosql.spi.predicate.Domain) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) TestingNodeManager(io.prestosql.testing.TestingNodeManager) Lists.reverse(com.google.common.collect.Lists.reverse) MoreObjects.toStringHelper(com.google.common.base.MoreObjects.toStringHelper) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) StandardTypes(io.prestosql.spi.type.StandardTypes) ConnectorSplit(io.prestosql.spi.connector.ConnectorSplit) BUCKET_COUNT_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.BUCKET_COUNT_PROPERTY) MapType(io.prestosql.spi.type.MapType) GroupByHashPageIndexerFactory(io.prestosql.GroupByHashPageIndexerFactory) Float.floatToRawIntBits(java.lang.Float.floatToRawIntBits) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) HiveTestUtils.rowType(io.prestosql.plugin.hive.HiveTestUtils.rowType) RecordCursor(io.prestosql.spi.connector.RecordCursor) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SESSION(io.prestosql.plugin.hive.HiveTestUtils.SESSION) HiveMetastore(io.prestosql.plugin.hive.metastore.HiveMetastore) LongStream(java.util.stream.LongStream) MULTIDELIMIT(io.prestosql.plugin.hive.HiveStorageFormat.MULTIDELIMIT) MoreFutures.getFutureValue(io.airlift.concurrent.MoreFutures.getFutureValue) PAGE_SORTER(io.prestosql.plugin.hive.HiveTestUtils.PAGE_SORTER) UTC(org.joda.time.DateTimeZone.UTC) MaterializedRow(io.prestosql.testing.MaterializedRow) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) IS_EXTERNAL_TABLE(io.prestosql.plugin.hive.HiveTableProperties.IS_EXTERNAL_TABLE) HiveColumnStatistics(io.prestosql.plugin.hive.metastore.HiveColumnStatistics) DateTimeTestingUtils.sqlTimestampOf(io.prestosql.testing.DateTimeTestingUtils.sqlTimestampOf) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) STAGE_AND_MOVE_TO_TARGET_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY) TableType(org.apache.hadoop.hive.metastore.TableType) ConnectorMetadata(io.prestosql.spi.connector.ConnectorMetadata) OrcPageSource(io.prestosql.plugin.hive.orc.OrcPageSource) HiveTestUtils.getDefaultHiveSelectiveFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveSelectiveFactories) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) Assertions.assertGreaterThan(io.airlift.testing.Assertions.assertGreaterThan) MoreCollectors.onlyElement(com.google.common.collect.MoreCollectors.onlyElement) Iterables.concat(com.google.common.collect.Iterables.concat) HiveWriteUtils.createDirectory(io.prestosql.plugin.hive.HiveWriteUtils.createDirectory) Path(org.apache.hadoop.fs.Path) KILOBYTE(io.airlift.units.DataSize.Unit.KILOBYTE) Constraint(io.prestosql.spi.connector.Constraint) ImmutableMap(com.google.common.collect.ImmutableMap) ArrayType(io.prestosql.spi.type.ArrayType) CharType.createCharType(io.prestosql.spi.type.CharType.createCharType) ConnectorSplitSource(io.prestosql.spi.connector.ConnectorSplitSource) HiveTestUtils.getDefaultHiveFileWriterFactories(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveFileWriterFactories) PRESTO_VERSION_NAME(io.prestosql.plugin.hive.HiveMetadata.PRESTO_VERSION_NAME) String.format(java.lang.String.format) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) VarcharType.createUnboundedVarcharType(io.prestosql.spi.type.VarcharType.createUnboundedVarcharType) HiveTestUtils.getTypes(io.prestosql.plugin.hive.HiveTestUtils.getTypes) ConnectorTransactionHandle(io.prestosql.spi.connector.ConnectorTransactionHandle) Optional(java.util.Optional) SORTED_BY_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.SORTED_BY_PROPERTY) Logger(io.airlift.log.Logger) CounterStat(io.airlift.stats.CounterStat) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) HivePrincipal(io.prestosql.plugin.hive.metastore.HivePrincipal) AtomicReference(java.util.concurrent.atomic.AtomicReference) SqlVarbinary(io.prestosql.spi.type.SqlVarbinary) BridgingHiveMetastore(io.prestosql.plugin.hive.metastore.thrift.BridgingHiveMetastore) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) COMMIT(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.COMMIT) TestingMetastoreLocator(io.prestosql.plugin.hive.metastore.thrift.TestingMetastoreLocator) Verify.verify(com.google.common.base.Verify.verify) Assertions.assertLessThanOrEqual(io.airlift.testing.Assertions.assertLessThanOrEqual) Range(io.prestosql.spi.predicate.Range) Threads.daemonThreadsNamed(io.airlift.concurrent.Threads.daemonThreadsNamed) HivePrivilegeInfo(io.prestosql.plugin.hive.metastore.HivePrivilegeInfo) Objects.requireNonNull(java.util.Objects.requireNonNull) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) REAL(io.prestosql.spi.type.RealType.REAL) HiveMetadata.convertToPredicate(io.prestosql.plugin.hive.HiveMetadata.convertToPredicate) ColumnStatistics(io.prestosql.spi.statistics.ColumnStatistics) HiveTestUtils.getNoOpIndexCache(io.prestosql.plugin.hive.HiveTestUtils.getNoOpIndexCache) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) TRANSACTION_CONFLICT(io.prestosql.spi.StandardErrorCode.TRANSACTION_CONFLICT) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) HiveTestUtils.getDefaultHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.getDefaultHiveRecordCursorProvider) HiveTestUtils.arrayType(io.prestosql.plugin.hive.HiveTestUtils.arrayType) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Executors.newCachedThreadPool(java.util.concurrent.Executors.newCachedThreadPool) ROLLBACK_AFTER_SINK_FINISH(io.prestosql.plugin.hive.AbstractTestHive.TransactionDeleteInsertTestTag.ROLLBACK_AFTER_SINK_FINISH) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) Partition(io.prestosql.plugin.hive.metastore.Partition) Table(io.prestosql.plugin.hive.metastore.Table) PartitionWithStatistics(io.prestosql.plugin.hive.metastore.PartitionWithStatistics) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity)

Example 4 with TEXTFILE

use of io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE in project boostkit-bigdata by kunpengcompute.

the class TestHiveFileFormats method testTextFile.

@Test(dataProvider = "rowCount")
public void testTextFile(int rowCount) throws Exception {
    List<TestColumn> testColumns = TEST_COLUMNS.stream().filter(column -> !column.getName().equals("t_map_null_key_complex_key_value")).collect(toList());
    assertThatFileFormat(TEXTFILE).withColumns(testColumns).withRowsCount(rowCount).isReadableByRecordCursor(new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT));
}
Also used : Iterables.transform(com.google.common.collect.Iterables.transform) RCBINARY(io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY) Test(org.testng.annotations.Test) TEXTFILE(io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE) FileSplit(org.apache.hadoop.mapred.FileSplit) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Predicates.not(com.google.common.base.Predicates.not) Locale(java.util.Locale) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Duration(java.time.Duration) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) JSON(io.prestosql.plugin.hive.HiveStorageFormat.JSON) SERIALIZATION_LIB(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB) LzoCodec(io.airlift.compress.lzo.LzoCodec) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) BeforeClass(org.testng.annotations.BeforeClass) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Assert.assertNotNull(org.testng.Assert.assertNotNull) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) RCTEXT(io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT) List(java.util.List) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) ParquetPageSourceFactory(io.prestosql.plugin.hive.parquet.ParquetPageSourceFactory) HDFS_ENVIRONMENT(io.prestosql.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT) Optional(java.util.Optional) Iterables.filter(com.google.common.collect.Iterables.filter) ORC(io.prestosql.plugin.hive.HiveStorageFormat.ORC) Joiner(com.google.common.base.Joiner) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) DataProvider(org.testng.annotations.DataProvider) Logger(io.airlift.log.Logger) PrimitiveCategory(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory) TYPE_MANAGER(io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER) Assert.assertEquals(org.testng.Assert.assertEquals) RcFilePageSourceFactory(io.prestosql.plugin.hive.rcfile.RcFilePageSourceFactory) OptionalInt(java.util.OptionalInt) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) LzopCodec(io.airlift.compress.lzo.LzopCodec) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Objects.requireNonNull(java.util.Objects.requireNonNull) OrcPageSourceFactory(io.prestosql.plugin.hive.orc.OrcPageSourceFactory) AVRO(io.prestosql.plugin.hive.HiveStorageFormat.AVRO) SEQUENCEFILE(io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE) RecordCursor(io.prestosql.spi.connector.RecordCursor) PARQUET(io.prestosql.plugin.hive.HiveStorageFormat.PARQUET) Properties(java.util.Properties) TupleDomain(io.prestosql.spi.predicate.TupleDomain) CSV(io.prestosql.plugin.hive.HiveStorageFormat.CSV) Assert.fail(org.testng.Assert.fail) IOException(java.io.IOException) File(java.io.File) OrcWriterOptions(io.prestosql.orc.OrcWriterOptions) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) Collectors.toList(java.util.stream.Collectors.toList) OrcCacheStore(io.prestosql.orc.OrcCacheStore) FILE_INPUT_FORMAT(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT) Assert.assertTrue(org.testng.Assert.assertTrue) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) RecordPageSource(io.prestosql.spi.connector.RecordPageSource) HiveTestUtils.createGenericHiveRecordCursorProvider(io.prestosql.plugin.hive.HiveTestUtils.createGenericHiveRecordCursorProvider) Test(org.testng.annotations.Test)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 ImmutableSet (com.google.common.collect.ImmutableSet)4 Logger (io.airlift.log.Logger)4 Slices.utf8Slice (io.airlift.slice.Slices.utf8Slice)4 AVRO (io.prestosql.plugin.hive.HiveStorageFormat.AVRO)3 CSV (io.prestosql.plugin.hive.HiveStorageFormat.CSV)3 JSON (io.prestosql.plugin.hive.HiveStorageFormat.JSON)3 ORC (io.prestosql.plugin.hive.HiveStorageFormat.ORC)3 PARQUET (io.prestosql.plugin.hive.HiveStorageFormat.PARQUET)3 RCBINARY (io.prestosql.plugin.hive.HiveStorageFormat.RCBINARY)3 RCTEXT (io.prestosql.plugin.hive.HiveStorageFormat.RCTEXT)3 SEQUENCEFILE (io.prestosql.plugin.hive.HiveStorageFormat.SEQUENCEFILE)3 TEXTFILE (io.prestosql.plugin.hive.HiveStorageFormat.TEXTFILE)3 TYPE_MANAGER (io.prestosql.plugin.hive.HiveTestUtils.TYPE_MANAGER)3 PrestoException (io.prestosql.spi.PrestoException)3 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)3 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)3 RecordCursor (io.prestosql.spi.connector.RecordCursor)3