use of com.facebook.presto.hive.ParquetFileWriterConfig in project presto by prestodb.
the class ParquetTester method assertMaxReadBytes.
void assertMaxReadBytes(List<ObjectInspector> objectInspectors, Iterable<?>[] writeValues, Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, Optional<MessageType> parquetSchema, DataSize maxReadBlockSize) throws Exception {
WriterVersion version = PARQUET_1_0;
CompressionCodecName compressionCodecName = UNCOMPRESSED;
HiveClientConfig config = new HiveClientConfig().setHiveStorageFormat(HiveStorageFormat.PARQUET).setUseParquetColumnNames(false).setParquetMaxReadBlockSize(maxReadBlockSize);
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
try (TempFile tempFile = new TempFile("test", "parquet")) {
JobConf jobConf = new JobConf();
jobConf.setEnum(COMPRESSION, compressionCodecName);
jobConf.setBoolean(ENABLE_DICTIONARY, true);
jobConf.setEnum(WRITER_VERSION, version);
writeParquetColumn(jobConf, tempFile.getFile(), compressionCodecName, createTableProperties(columnNames, objectInspectors), getStandardStructObjectInspector(columnNames, objectInspectors), getIterators(writeValues), parquetSchema, false);
Iterator<?>[] expectedValues = getIterators(readValues);
try (ConnectorPageSource pageSource = getFileFormat().createFileFormatReader(session, HDFS_ENVIRONMENT, tempFile.getFile(), columnNames, columnTypes)) {
assertPageSource(columnTypes, expectedValues, pageSource, Optional.of(getParquetMaxReadBlockSize(session).toBytes()));
assertFalse(stream(expectedValues).allMatch(Iterator::hasNext));
}
}
}
use of com.facebook.presto.hive.ParquetFileWriterConfig in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatistics.
@Test
public void testGetTableStatistics() {
String partitionName = "p1=string1/p2=1234";
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(PARTITION_COLUMN_1, ColumnStatistics.builder().setDataSize(Estimate.of(7000)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(PARTITION_COLUMN_2, ColumnStatistics.builder().setRange(new DoubleRange(1234, 1234)).setNullsFraction(Estimate.of(0)).setDistinctValuesCount(Estimate.of(1)).build()).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of("p1", PARTITION_COLUMN_1, "p2", PARTITION_COLUMN_2, COLUMN, columnHandle), ImmutableMap.of("p1", VARCHAR, "p2", BIGINT, COLUMN, BIGINT), ImmutableList.of(partition(partitionName))), expected);
}
use of com.facebook.presto.hive.ParquetFileWriterConfig in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsUnpartitioned.
@Test
public void testGetTableStatisticsUnpartitioned() {
PartitionStatistics statistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(OptionalLong.empty(), OptionalLong.of(1000), OptionalLong.of(5000), OptionalLong.empty())).setColumnStatistics(ImmutableMap.of(COLUMN, createIntegerColumnStatistics(OptionalLong.of(-100), OptionalLong.of(100), OptionalLong.of(500), OptionalLong.of(300)))).build();
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(UNPARTITIONED_ID, statistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig(), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveColumnHandle columnHandle = new HiveColumnHandle(COLUMN, HIVE_LONG, BIGINT.getTypeSignature(), 2, REGULAR, Optional.empty(), Optional.empty());
TableStatistics expected = TableStatistics.builder().setRowCount(Estimate.of(1000)).setTotalSize(Estimate.of(5000)).setColumnStatistics(columnHandle, ColumnStatistics.builder().setRange(new DoubleRange(-100, 100)).setNullsFraction(Estimate.of(0.5)).setDistinctValuesCount(Estimate.of(300)).build()).build();
assertEquals(statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(COLUMN, columnHandle), ImmutableMap.of(COLUMN, BIGINT), ImmutableList.of(new HivePartition(TABLE))), expected);
}
use of com.facebook.presto.hive.ParquetFileWriterConfig in project presto by prestodb.
the class TestMetastoreHiveStatisticsProvider method testGetTableStatisticsValidationFailure.
@Test
public void testGetTableStatisticsValidationFailure() {
PartitionStatistics corruptedStatistics = PartitionStatistics.builder().setBasicStatistics(new HiveBasicStatistics(-1, 0, 0, 0)).build();
String partitionName = "p1=string1/p2=1234";
MetastoreHiveStatisticsProvider statisticsProvider = new MetastoreHiveStatisticsProvider((session, table, hivePartitions) -> ImmutableMap.of(partitionName, corruptedStatistics));
TestingConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setIgnoreCorruptedStatistics(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
assertThatThrownBy(() -> statisticsProvider.getTableStatistics(session, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName)))).isInstanceOf(PrestoException.class).hasFieldOrPropertyWithValue("errorCode", HIVE_CORRUPTED_COLUMN_STATISTICS.toErrorCode());
TestingConnectorSession ignoreSession = new TestingConnectorSession(new HiveSessionProperties(new HiveClientConfig().setIgnoreCorruptedStatistics(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
assertEquals(statisticsProvider.getTableStatistics(ignoreSession, TABLE, ImmutableMap.of(), ImmutableMap.of(), ImmutableList.of(partition(partitionName))), TableStatistics.empty());
}
use of com.facebook.presto.hive.ParquetFileWriterConfig in project presto by prestodb.
the class ParquetTester method testSingleRead.
public static void testSingleRead(Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, ParquetMetadataSource parquetMetadataSource, File dataFile) {
HiveClientConfig config = new HiveClientConfig().setHiveStorageFormat(HiveStorageFormat.PARQUET).setUseParquetColumnNames(false).setParquetMaxReadBlockSize(new DataSize(1_000, DataSize.Unit.BYTE));
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveBatchPageSourceFactory pageSourceFactory = new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, new FileFormatDataSourceStats(), parquetMetadataSource);
ConnectorPageSource connectorPageSource = createPageSource(pageSourceFactory, session, dataFile, columnNames, columnTypes, HiveStorageFormat.PARQUET);
Iterator<?>[] expectedValues = stream(readValues).map(Iterable::iterator).toArray(size -> new Iterator<?>[size]);
if (connectorPageSource instanceof RecordPageSource) {
assertRecordCursor(columnTypes, expectedValues, ((RecordPageSource) connectorPageSource).getCursor());
} else {
assertPageSource(columnTypes, expectedValues, connectorPageSource);
}
assertFalse(stream(expectedValues).allMatch(Iterator::hasNext));
}
Aggregations