use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.
the class TestHiveFileFormats method testPageSourceFactory.
private void testPageSourceFactory(HivePageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testReadColumns, ConnectorSession session, long fileSize, int rowCount) throws IOException {
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, storageFormat.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, storageFormat.getSerde());
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn testReadColumn : testReadColumns) {
String name = testReadColumn.getBaseName();
if (!baseColumnNames.contains(name) && !testReadColumn.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(testReadColumn.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = testReadColumns.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
List<HiveColumnHandle> columnHandles = getColumnHandles(testReadColumns);
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), fileSize, Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(sourceFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), fileSize, splitProperties, TupleDomain.all(), columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
assertTrue(pageSource.isPresent());
checkPageSource(pageSource.get(), testReadColumns, getTypes(columnHandles), rowCount);
}
use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.
the class TestHivePageSink method createPageSource.
private static ConnectorPageSource createPageSource(HiveTransactionHandle transaction, HiveConfig config, File outputFile) {
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, config.getHiveStorageFormat().getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, config.getHiveStorageFormat().getSerde());
splitProperties.setProperty("columns", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getName).collect(toImmutableList())));
splitProperties.setProperty("columns.types", Joiner.on(',').join(getColumnHandles().stream().map(HiveColumnHandle::getHiveType).map(hiveType -> hiveType.getHiveTypeName().toString()).collect(toImmutableList())));
HiveSplit split = new HiveSplit(SCHEMA_NAME, TABLE_NAME, "", "file:///" + outputFile.getAbsolutePath(), 0, outputFile.length(), outputFile.length(), outputFile.lastModified(), splitProperties, ImmutableList.of(), ImmutableList.of(), OptionalInt.empty(), 0, false, TableToPartitionMapping.empty(), Optional.empty(), Optional.empty(), false, Optional.empty(), 0, SplitWeight.standard());
ConnectorTableHandle table = new HiveTableHandle(SCHEMA_NAME, TABLE_NAME, ImmutableMap.of(), ImmutableList.of(), ImmutableList.of(), Optional.empty());
HivePageSourceProvider provider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, HDFS_ENVIRONMENT, config, getDefaultHivePageSourceFactories(HDFS_ENVIRONMENT, config), getDefaultHiveRecordCursorProviders(config, HDFS_ENVIRONMENT), new GenericHiveRecordCursorProvider(HDFS_ENVIRONMENT, config), Optional.empty());
return provider.createPageSource(transaction, getHiveSession(config), split, table, ImmutableList.copyOf(getColumnHandles()), DynamicFilter.EMPTY);
}
use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.
the class TestRowParametricType method testTypeSignatureRoundTrip.
@Test
public void testTypeSignatureRoundTrip() {
TypeSignature typeSignature = new TypeSignature(ROW, TypeSignatureParameter.namedTypeParameter(new NamedTypeSignature(Optional.of(new RowFieldName("col1")), BIGINT.getTypeSignature())), TypeSignatureParameter.namedTypeParameter(new NamedTypeSignature(Optional.of(new RowFieldName("col2")), DOUBLE.getTypeSignature())));
List<TypeParameter> parameters = typeSignature.getParameters().stream().map(parameter -> TypeParameter.of(parameter, TESTING_TYPE_MANAGER)).collect(Collectors.toList());
Type rowType = RowParametricType.ROW.createType(TESTING_TYPE_MANAGER, parameters);
assertEquals(rowType.getTypeSignature(), typeSignature);
}
use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.
the class TestTransactionLogAccess method testSnapshotsAreConsistent.
@Test
public void testSnapshotsAreConsistent() throws Exception {
String tableName = "person";
File tempDir = Files.createTempDir();
File tableDir = new File(tempDir, tableName);
File transactionLogDir = new File(tableDir, TRANSACTION_LOG_DIRECTORY);
transactionLogDir.mkdirs();
File resourceDir = new File(getClass().getClassLoader().getResource("databricks/person/_delta_log").toURI());
copyTransactionLogEntry(0, 12, resourceDir, transactionLogDir);
Files.copy(new File(resourceDir, LAST_CHECKPOINT_FILENAME), new File(transactionLogDir, LAST_CHECKPOINT_FILENAME));
setupTransactionLogAccess(tableName, new Path(tableDir.toURI()));
List<AddFileEntry> expectedDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
copyTransactionLogEntry(12, 14, resourceDir, transactionLogDir);
Set<String> newDataFiles = ImmutableSet.of("age=28/part-00000-40dd1707-1d42-4328-a59a-21f5c945fe60.c000.snappy.parquet", "age=29/part-00000-3794c463-cb0c-4beb-8d07-7cc1e3b5920f.c000.snappy.parquet");
TableSnapshot updatedTableSnapshot = transactionLogAccess.loadSnapshot(new SchemaTableName("schema", tableName), new Path(tableDir.toURI()), SESSION);
List<AddFileEntry> allDataFiles = transactionLogAccess.getActiveFiles(updatedTableSnapshot, SESSION);
List<AddFileEntry> dataFilesWithFixedVersion = transactionLogAccess.getActiveFiles(tableSnapshot, SESSION);
for (String newFilePath : newDataFiles) {
assertTrue(allDataFiles.stream().anyMatch(entry -> entry.getPath().equals(newFilePath)));
assertTrue(dataFilesWithFixedVersion.stream().noneMatch(entry -> entry.getPath().equals(newFilePath)));
}
assertEquals(expectedDataFiles.size(), dataFilesWithFixedVersion.size());
List<ColumnMetadata> columns = extractSchema(transactionLogAccess.getMetadataEntry(tableSnapshot, SESSION).get(), TESTING_TYPE_MANAGER);
for (int i = 0; i < expectedDataFiles.size(); i++) {
AddFileEntry expected = expectedDataFiles.get(i);
AddFileEntry actual = dataFilesWithFixedVersion.get(i);
assertEquals(expected.getPath(), actual.getPath());
assertEquals(expected.getPartitionValues(), actual.getPartitionValues());
assertEquals(expected.getSize(), actual.getSize());
assertEquals(expected.getModificationTime(), actual.getModificationTime());
assertEquals(expected.isDataChange(), actual.isDataChange());
assertEquals(expected.getTags(), actual.getTags());
assertTrue(expected.getStats().isPresent());
assertTrue(actual.getStats().isPresent());
for (ColumnMetadata column : columns) {
DeltaLakeColumnHandle columnHandle = new DeltaLakeColumnHandle(column.getName(), column.getType(), REGULAR);
assertEquals(expected.getStats().get().getMinColumnValue(columnHandle), actual.getStats().get().getMinColumnValue(columnHandle));
assertEquals(expected.getStats().get().getMaxColumnValue(columnHandle), actual.getStats().get().getMaxColumnValue(columnHandle));
assertEquals(expected.getStats().get().getNullCount(columnHandle.getName()), actual.getStats().get().getNullCount(columnHandle.getName()));
assertEquals(expected.getStats().get().getNumRecords(), actual.getStats().get().getNumRecords());
}
}
}
use of io.trino.type.InternalTypeManager.TESTING_TYPE_MANAGER in project trino by trinodb.
the class TestOrcPredicates method createPageSource.
private ConnectorPageSource createPageSource(TupleDomain<TestColumn> effectivePredicate, List<TestColumn> columnsToRead, ConnectorSession session, FileSplit split) {
OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), HDFS_ENVIRONMENT, STATS, UTC);
Properties splitProperties = new Properties();
splitProperties.setProperty(FILE_INPUT_FORMAT, ORC.getInputFormat());
splitProperties.setProperty(SERIALIZATION_LIB, ORC.getSerde());
// Use full columns in split properties
ImmutableList.Builder<String> splitPropertiesColumnNames = ImmutableList.builder();
ImmutableList.Builder<String> splitPropertiesColumnTypes = ImmutableList.builder();
Set<String> baseColumnNames = new HashSet<>();
for (TestColumn columnToRead : columnsToRead) {
String name = columnToRead.getBaseName();
if (!baseColumnNames.contains(name) && !columnToRead.isPartitionKey()) {
baseColumnNames.add(name);
splitPropertiesColumnNames.add(name);
splitPropertiesColumnTypes.add(columnToRead.getBaseObjectInspector().getTypeName());
}
}
splitProperties.setProperty("columns", splitPropertiesColumnNames.build().stream().collect(Collectors.joining(",")));
splitProperties.setProperty("columns.types", splitPropertiesColumnTypes.build().stream().collect(Collectors.joining(",")));
List<HivePartitionKey> partitionKeys = columnsToRead.stream().filter(TestColumn::isPartitionKey).map(input -> new HivePartitionKey(input.getName(), (String) input.getWriteValue())).collect(toList());
String partitionName = String.join("/", partitionKeys.stream().map(partitionKey -> format("%s=%s", partitionKey.getName(), partitionKey.getValue())).collect(toImmutableList()));
List<HiveColumnHandle> columnHandles = getColumnHandles(columnsToRead);
TupleDomain<HiveColumnHandle> predicate = effectivePredicate.transformKeys(testColumn -> {
Optional<HiveColumnHandle> handle = columnHandles.stream().filter(column -> testColumn.getName().equals(column.getName())).findFirst();
checkState(handle.isPresent(), "Predicate on invalid column");
return handle.get();
});
List<HivePageSourceProvider.ColumnMapping> columnMappings = buildColumnMappings(partitionName, partitionKeys, columnHandles, ImmutableList.of(), TableToPartitionMapping.empty(), split.getPath(), OptionalInt.empty(), split.getLength(), Instant.now().toEpochMilli());
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(readerFactory), ImmutableSet.of(), new Configuration(false), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), splitProperties, predicate, columnHandles, TESTING_TYPE_MANAGER, Optional.empty(), Optional.empty(), false, Optional.empty(), false, NO_ACID_TRANSACTION, columnMappings);
assertTrue(pageSource.isPresent());
return pageSource.get();
}
Aggregations