Search in sources :

Example 6 with OrcFileWriterConfig

use of io.prestosql.plugin.hive.OrcFileWriterConfig in project boostkit-bigdata by kunpengcompute.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.

@Test
public void testGetTableStatisticsValidationFailure() {
    PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
    String partitionName = "p1=string1/p2=1234";
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(partitionName, corruptedStatistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table)).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HiveErrorCode.HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
    TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)), true, table), TableStatistics.empty());
}
Also used : MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) PrestoException(io.prestosql.spi.PrestoException) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Test(org.testng.annotations.Test)

Example 7 with OrcFileWriterConfig

use of io.prestosql.plugin.hive.OrcFileWriterConfig in project boostkit-bigdata by kunpengcompute.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.

@Test
public void testGetTableStatisticsUnpartitioned() {
    PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.empty(), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, HiveColumnStatistics.createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty());
    TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
    assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE)), true, table), expected);
}
Also used : DoubleRange(io.prestosql.spi.statistics.DoubleRange) MetastoreHiveStatisticsProvider.validatePartitionStatistics(io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) TableStatistics(io.prestosql.spi.statistics.TableStatistics) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) HiveConfig(io.prestosql.plugin.hive.HiveConfig) HivePartition(io.prestosql.plugin.hive.HivePartition) Test(org.testng.annotations.Test)

Example 8 with OrcFileWriterConfig

use of io.prestosql.plugin.hive.OrcFileWriterConfig in project boostkit-bigdata by kunpengcompute.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsSampling.

@Test
public void testGetTableStatisticsSampling() {
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> {
        assertEquals(schemaTableName, TABLE);
        assertEquals(hivePartitions.size(), 1);
        return ImmutableMap.of();
    });
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setPartitionStatisticsSampleSize(1), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string1/p2=1235")), true, table);
}
Also used : TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Test(org.testng.annotations.Test)

Example 9 with OrcFileWriterConfig

use of io.prestosql.plugin.hive.OrcFileWriterConfig in project hetu-core by openlookeng.

the class ParquetTester method assertMaxReadBytes.

static void assertMaxReadBytes(List<ObjectInspector> objectInspectors, Iterable<?>[] writeValues, Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, Optional<MessageType> parquetSchema, DataSize maxReadBlockSize) throws Exception {
    CompressionCodecName compressionCodecName = UNCOMPRESSED;
    HiveConfig config = new HiveConfig().setHiveStorageFormat(HiveStorageFormat.PARQUET).setUseParquetColumnNames(false).setParquetMaxReadBlockSize(maxReadBlockSize);
    ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    try (TempFile tempFile = new TempFile("test", "parquet")) {
        JobConf jobConf = new JobConf();
        jobConf.setEnum(COMPRESSION, compressionCodecName);
        jobConf.setBoolean(ENABLE_DICTIONARY, true);
        jobConf.setEnum(WRITER_VERSION, PARQUET_1_0);
        writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, createTableProperties(columnNames, objectInspectors), getStandardStructObjectInspector(columnNames, objectInspectors), getIterators(writeValues), parquetSchema, false);
        Iterator<?>[] expectedValues = getIterators(readValues);
        try (ConnectorPageSource pageSource = getFileFormat().createFileFormatReader(session, HDFS_ENVIRONMENT, tempFile.getFile(), columnNames, columnTypes)) {
            assertPageSource(columnTypes, expectedValues, pageSource, Optional.of(getParquetMaxReadBlockSize(session).toBytes()));
            assertFalse(stream(expectedValues).allMatch(Iterator::hasNext));
        }
    }
}
Also used : CompressionCodecName(org.apache.parquet.hadoop.metadata.CompressionCodecName) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) Iterator(java.util.Iterator) AbstractIterator(com.google.common.collect.AbstractIterator) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) JobConf(org.apache.hadoop.mapred.JobConf) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveConfig(io.prestosql.plugin.hive.HiveConfig)

Example 10 with OrcFileWriterConfig

use of io.prestosql.plugin.hive.OrcFileWriterConfig in project hetu-core by openlookeng.

the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsSampling.

@Test
public void testGetTableStatisticsSampling() {
    MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, schemaTableName, hivePartitions, table) -> {
        assertEquals(schemaTableName, TABLE);
        assertEquals(hivePartitions.size(), 1);
        return ImmutableMap.of();
    });
    TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setPartitionStatisticsSampleSize(1), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
    statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition("p1=string1/p2=1234"), partition("p1=string1/p2=1235")), true, table);
}
Also used : TestingConnectorSession(io.prestosql.testing.TestingConnectorSession) OrcFileWriterConfig(io.prestosql.plugin.hive.OrcFileWriterConfig) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ParquetFileWriterConfig(io.prestosql.plugin.hive.ParquetFileWriterConfig) HiveConfig(io.prestosql.plugin.hive.HiveConfig) Test(org.testng.annotations.Test)

Aggregations

HiveConfig (io.prestosql.plugin.hive.HiveConfig)12 HiveSessionProperties (io.prestosql.plugin.hive.HiveSessionProperties)12 OrcFileWriterConfig (io.prestosql.plugin.hive.OrcFileWriterConfig)12 ParquetFileWriterConfig (io.prestosql.plugin.hive.ParquetFileWriterConfig)12 TestingConnectorSession (io.prestosql.testing.TestingConnectorSession)12 Test (org.testng.annotations.Test)10 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)6 PartitionStatistics (io.prestosql.plugin.hive.PartitionStatistics)6 MetastoreHiveStatisticsProvider.validatePartitionStatistics (io.prestosql.plugin.hive.statistics.MetastoreHiveStatisticsProvider.validatePartitionStatistics)6 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)4 DoubleRange (io.prestosql.spi.statistics.DoubleRange)4 TableStatistics (io.prestosql.spi.statistics.TableStatistics)4 AbstractIterator (com.google.common.collect.AbstractIterator)2 HivePartition (io.prestosql.plugin.hive.HivePartition)2 PrestoException (io.prestosql.spi.PrestoException)2 ConnectorPageSource (io.prestosql.spi.connector.ConnectorPageSource)2 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)2 Iterator (java.util.Iterator)2 JobConf (org.apache.hadoop.mapred.JobConf)2 CompressionCodecName (org.apache.parquet.hadoop.metadata.CompressionCodecName)2