use of io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT in project trino by trinodb.
the class TestHiveFileFormats method testRCBinaryProjectedColumns.
@Test(dataProvider = "rowCount")
public void testRCBinaryProjectedColumns(int rowCount) throws Exception {
// RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls
List<TestColumn> supportedColumns = TEST_COLUMNS.stream().filter(testColumn -> {
String name = testColumn.getName();
return !name.equals("t_map_null_key_complex_key_value") && !name.equals("t_empty_varchar");
}).collect(toList());
List<TestColumn> regularColumns = getRegularColumns(supportedColumns);
List<TestColumn> partitionColumns = getPartitionColumns(supportedColumns);
// Created projected columns for all regular supported columns
ImmutableList.Builder<TestColumn> writeColumnsBuilder = ImmutableList.builder();
ImmutableList.Builder<TestColumn> readeColumnsBuilder = ImmutableList.builder();
generateProjectedColumns(regularColumns, writeColumnsBuilder, readeColumnsBuilder);
List<TestColumn> writeColumns = writeColumnsBuilder.addAll(partitionColumns).build();
List<TestColumn> readColumns = readeColumnsBuilder.addAll(partitionColumns).build();
assertThatFileFormat(RCBINARY).withWriteColumns(writeColumns).withReadColumns(readColumns).withRowsCount(rowCount).withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)).isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()));
}
use of io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT in project trino by trinodb.
the class TestBackgroundHiveSplitLoader method testFullAcidTableWithOriginalFiles.
@Test
public void testFullAcidTableWithOriginalFiles() throws Exception {
java.nio.file.Path tablePath = Files.createTempDirectory("TestBackgroundHiveSplitLoader");
Table table = table(tablePath.toString(), ImmutableList.of(), Optional.empty(), ImmutableMap.of("transactional", "true"));
String originalFile = tablePath + "/000000_1";
List<String> filePaths = ImmutableList.of(tablePath + "/delta_0000002_0000002_0000/_orc_acid_version", tablePath + "/delta_0000002_0000002_0000/bucket_00000");
for (String path : filePaths) {
File file = new File(path);
assertTrue(file.getParentFile().exists() || file.getParentFile().mkdirs(), "Failed creating directory " + file.getParentFile());
createOrcAcidFile(file);
}
Files.write(Paths.get(originalFile), "test".getBytes(UTF_8));
// ValidWriteIdsList is of format <currentTxn>$<schema>.<table>:<highWatermark>:<minOpenWriteId>::<AbortedTxns>
// This writeId list has high watermark transaction=3
ValidReaderWriteIdList validWriteIdsList = new ValidReaderWriteIdList(format("4$%s.%s:3:9223372036854775807::", table.getDatabaseName(), table.getTableName()));
BackgroundHiveSplitLoader backgroundHiveSplitLoader = backgroundHiveSplitLoader(HDFS_ENVIRONMENT, TupleDomain.all(), Optional.empty(), table, Optional.empty(), Optional.of(validWriteIdsList));
HiveSplitSource hiveSplitSource = hiveSplitSource(backgroundHiveSplitLoader);
backgroundHiveSplitLoader.start(hiveSplitSource);
List<String> splits = drain(hiveSplitSource);
assertTrue(splits.stream().anyMatch(p -> p.contains(originalFile)), format("%s not found in splits %s", filePaths.get(0), splits));
assertTrue(splits.stream().anyMatch(p -> p.contains(filePaths.get(1))), format("%s not found in splits %s", filePaths.get(1), splits));
}
use of io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT in project trino by trinodb.
the class TestHivePageSink method createPageSource.
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveConfig config, File outputFile) {
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde());
splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList())));
splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList())));
HiveSplit split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), outputFile.lastModified(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), 0, false, TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), false, Optional.empty(), 0, SplitWeight.standard());
ConnectorTableHandle table = new HiveTableHandle(SCHEMA_NAME, TABLE_NAME, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
HivePageSourceProvider provider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, config, getDefaultHivePageSourceFactories(HDFS_ENVIRONMENT, config), getDefaultHiveRecordCursorProviders(config, HDFS_ENVIRONMENT), new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT, config), Optional.empty());
return provider.createPageSource(transaction, getHiveSession(config), split, table, ImmutableList.copyOf(getColumnHandles()), DynamicFilter.EMPTY);
}
use of io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT in project trino by trinodb.
the class TestOrcPredicates method createPageSource.
private ConnectorPageSource createPageSource(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split) {
OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC);
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde());
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn columnToRead : columnsToRead) {
String name = columnToRead.getBaseName();
if (!baseColumnNames.contains(name) && !columnToRead.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(columnToRead.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = columnsToRead.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
List<HiveColumnHandle> columnHandles = getColumnHandles(columnsToRead);
TupleDomain<HiveColumnHandle> predicate = effectivePredicate.transformKeys(testColumn -> {
Optional<HiveColumnHandle> handle = columnHandles.stream().filter(column -> testColumn.getName().equals(column.getName())).findFirst();
checkState(handle.isPresent(), "Predicate on invalid column");
return handle.get();
});
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), split.getLength(), Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(readerFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, predicate, columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
assertTrue(pageSource.isPresent());
return pageSource.get();
}
use of io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT in project trino by trinodb.
the class TestHiveFileFormats method testRCBinaryProjectedColumnsPageSource.
@Test(dataProvider = "rowCount")
public void testRCBinaryProjectedColumnsPageSource(int rowCount) throws Exception {
// RCBinary does not support complex type as key of a map and interprets empty VARCHAR as nulls
List<TestColumn> supportedColumns = TEST_COLUMNS.stream().filter(testColumn -> !testColumn.getName().equals("t_empty_varchar")).collect(toList());
List<TestColumn> regularColumns = getRegularColumns(supportedColumns);
List<TestColumn> partitionColumns = getPartitionColumns(supportedColumns);
// Created projected columns for all regular supported columns
ImmutableList.Builder<TestColumn> writeColumnsBuilder = ImmutableList.builder();
ImmutableList.Builder<TestColumn> readeColumnsBuilder = ImmutableList.builder();
generateProjectedColumns(regularColumns, writeColumnsBuilder, readeColumnsBuilder);
List<TestColumn> writeColumns = writeColumnsBuilder.addAll(partitionColumns).build();
List<TestColumn> readColumns = readeColumnsBuilder.addAll(partitionColumns).build();
assertThatFileFormat(RCBINARY).withWriteColumns(writeColumns).withReadColumns(readColumns).withRowsCount(rowCount).withFileWriterFactory(new RcFileFileWriterFactory(HDFS_ENVIRONMENT, TESTING_TYPE_MANAGER, new NodeVersion("test"), HIVE_STORAGE_TIME_ZONE, STATS)).isReadableByPageSource(new RcFilePageSourceFactory(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, STATS, new HiveConfig()));
}
Aggregations