use of io.trino.spi.type.Type in project trino by trinodb.
the class HiveTypeTranslator method toTypeSignature.
public static TypeSignature toTypeSignature(TypeInfo typeInfo, HiveTimestampPrecision timestampPrecision) {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
Type primitiveType = fromPrimitiveType((PrimitiveTypeInfo) typeInfo, timestampPrecision);
if (primitiveType == null) {
break;
}
return primitiveType.getTypeSignature();
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
return mapType(toTypeSignature(mapTypeInfo.getMapKeyTypeInfo(), timestampPrecision), toTypeSignature(mapTypeInfo.getMapValueTypeInfo(), timestampPrecision));
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
TypeSignature elementType = toTypeSignature(listTypeInfo.getListElementTypeInfo(), timestampPrecision);
return arrayType(typeParameter(elementType));
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos();
List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
if (fieldTypes.size() != fieldNames.size()) {
throw new TrinoException(HiveErrorCode.HIVE_INVALID_METADATA, format("Invalid Hive struct type: %s", typeInfo));
}
return rowType(Streams.zip(// TODO: This is a hack. Trino engine should be able to handle identifiers in a case insensitive way where necessary.
fieldNames.stream().map(s -> s.toLowerCase(Locale.US)), fieldTypes.stream().map(type -> toTypeSignature(type, timestampPrecision)), TypeSignatureParameter::namedField).collect(Collectors.toList()));
case UNION:
// Use a row type to represent a union type in Hive for reading
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List<TypeInfo> unionObjectTypes = unionTypeInfo.getAllUnionObjectTypeInfos();
ImmutableList.Builder<TypeSignatureParameter> typeSignatures = ImmutableList.builder();
typeSignatures.add(namedField("tag", TINYINT.getTypeSignature()));
for (int i = 0; i < unionObjectTypes.size(); i++) {
TypeInfo unionObjectType = unionObjectTypes.get(i);
typeSignatures.add(namedField("field" + i, toTypeSignature(unionObjectType, timestampPrecision)));
}
return rowType(typeSignatures.build());
}
throw new TrinoException(NOT_SUPPORTED, format("Unsupported Hive type: %s", typeInfo));
}
use of io.trino.spi.type.Type in project trino by trinodb.
the class HiveBlockEncodingSerde method readType.
@Override
public Type readType(SliceInput sliceInput) {
requireNonNull(sliceInput, "sliceInput is null");
TypeId id = TypeId.of(readLengthPrefixedString(sliceInput));
for (Type type : TYPES) {
if (type.getTypeId().equals(id)) {
return type;
}
}
throw new IllegalArgumentException("Type not found: " + id);
}
use of io.trino.spi.type.Type in project trino by trinodb.
the class MetastoreHiveStatisticsProvider method createZeroStatistics.
private TableStatistics createZeroStatistics(Map<String, ColumnHandle> columns, Map<String, Type> columnTypes) {
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(0));
columns.forEach((columnName, columnHandle) -> {
Type columnType = columnTypes.get(columnName);
verifyNotNull(columnType, "columnType is missing for column: %s", columnName);
ColumnStatistics.Builder columnStatistics = ColumnStatistics.builder();
columnStatistics.setNullsFraction(Estimate.of(0));
columnStatistics.setDistinctValuesCount(Estimate.of(0));
if (hasDataSize(columnType)) {
columnStatistics.setDataSize(Estimate.of(0));
}
result.setColumnStatistics(columnHandle, columnStatistics.build());
});
return result.build();
}
use of io.trino.spi.type.Type in project trino by trinodb.
the class AbstractTestHive method setup.
protected final void setup(String databaseName, HiveConfig hiveConfig, HiveMetastore hiveMetastore, HdfsEnvironment hdfsConfiguration) {
setupHive(databaseName);
metastoreClient = hiveMetastore;
hdfsEnvironment = hdfsConfiguration;
HivePartitionManager partitionManager = new HivePartitionManager(hiveConfig);
locationService = new HiveLocationService(hdfsEnvironment);
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
metadataFactory = new HiveMetadataFactory(new CatalogName("hive"), HiveMetastoreFactory.ofInstance(metastoreClient), hdfsEnvironment, partitionManager, 10, 10, 10, false, false, false, true, true, false, 1000, Optional.empty(), true, TESTING_TYPE_MANAGER, NOOP_METADATA_PROVIDER, locationService, partitionUpdateCodec, newFixedThreadPool(2), heartbeatService, TEST_SERVER_VERSION, (session, tableHandle) -> {
if (!tableHandle.getTableName().contains("apply_redirection_tester")) {
return Optional.empty();
}
return Optional.of(new TableScanRedirectApplicationResult(new CatalogSchemaTableName("hive", databaseName, "mock_redirection_target"), ImmutableMap.of(), TupleDomain.all()));
}, ImmutableSet.of(new PartitionsSystemTableProvider(partitionManager, TESTING_TYPE_MANAGER), new PropertiesSystemTableProvider()), metastore -> new NoneHiveMaterializedViewMetadata() {
@Override
public Optional<ConnectorMaterializedViewDefinition> getMaterializedView(ConnectorSession session, SchemaTableName viewName) {
if (!viewName.getTableName().contains("materialized_view_tester")) {
return Optional.empty();
}
return Optional.of(new ConnectorMaterializedViewDefinition("dummy_view_sql", Optional.empty(), Optional.empty(), Optional.empty(), ImmutableList.of(new ConnectorMaterializedViewDefinition.Column("abc", TypeId.of("type"))), Optional.empty(), Optional.of("alice"), ImmutableMap.of()));
}
}, SqlStandardAccessControlMetadata::new, NO_REDIRECTIONS, TableInvalidationCallback.NOOP);
transactionManager = new HiveTransactionManager(metadataFactory);
splitManager = new HiveSplitManager(transactionManager, partitionManager, new NamenodeStats(), hdfsEnvironment, new CachingDirectoryLister(hiveConfig), directExecutor(), new CounterStat(), 100, hiveConfig.getMaxOutstandingSplitsSize(), hiveConfig.getMinPartitionBatchSize(), hiveConfig.getMaxPartitionBatchSize(), hiveConfig.getMaxInitialSplits(), hiveConfig.getSplitLoaderConcurrency(), hiveConfig.getMaxSplitsPerSecond(), false, TESTING_TYPE_MANAGER);
pageSinkProvider = new HivePageSinkProvider(getDefaultHiveFileWriterFactories(hiveConfig, hdfsEnvironment), hdfsEnvironment, PAGE_SORTER, HiveMetastoreFactory.ofInstance(metastoreClient), new GroupByHashPageIndexerFactory(JOIN_COMPILER, BLOCK_TYPE_OPERATORS), TESTING_TYPE_MANAGER, getHiveConfig(), locationService, partitionUpdateCodec, new TestingNodeManager("fake-environment"), new HiveEventClient(), getHiveSessionProperties(hiveConfig), new HiveWriterStats());
pageSourceProvider = new HivePageSourceProvider(TESTING_TYPE_MANAGER, hdfsEnvironment, hiveConfig, getDefaultHivePageSourceFactories(hdfsEnvironment, hiveConfig), getDefaultHiveRecordCursorProviders(hiveConfig, hdfsEnvironment), new GenericHiveRecordCursorProvider(hdfsEnvironment, hiveConfig), Optional.empty());
nodePartitioningProvider = new HiveNodePartitioningProvider(new TestingNodeManager("fake-environment"), TESTING_TYPE_MANAGER);
}
use of io.trino.spi.type.Type in project trino by trinodb.
the class AbstractTestHive method doTestBucketSortedTables.
private void doTestBucketSortedTables(SchemaTableName table) throws IOException {
int bucketCount = 3;
int expectedRowCount = 0;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(table, ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", VARCHAR)).add(new ColumnMetadata("value_asc", VARCHAR)).add(new ColumnMetadata("value_desc", BIGINT)).add(new ColumnMetadata("ds", VARCHAR)).build(), ImmutableMap.<String, Object>builder().put(STORAGE_FORMAT_PROPERTY, RCBINARY).put(PARTITIONED_BY_PROPERTY, ImmutableList.of("ds")).put(BUCKETED_BY_PROPERTY, ImmutableList.of("id")).put(BUCKET_COUNT_PROPERTY, bucketCount).put(SORTED_BY_PROPERTY, ImmutableList.builder().add(new SortingColumn("value_asc", ASCENDING)).add(new SortingColumn("value_desc", DESCENDING)).build()).buildOrThrow());
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty(), NO_RETRIES);
// write the data
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
List<Type> types = tableMetadata.getColumns().stream().map(ColumnMetadata::getType).collect(toList());
ThreadLocalRandom random = ThreadLocalRandom.current();
for (int i = 0; i < 50; i++) {
MaterializedResult.Builder builder = MaterializedResult.resultBuilder(session, types);
for (int j = 0; j < 1000; j++) {
builder.row(sha256().hashLong(random.nextLong()).toString(), "test" + random.nextInt(100), random.nextLong(100_000), "2018-04-01");
expectedRowCount++;
}
sink.appendPage(builder.build().toPage());
}
HdfsContext context = new HdfsContext(session);
// verify we have enough temporary files per bucket to require multiple passes
Path stagingPathRoot;
if (isTemporaryStagingDirectoryEnabled(session)) {
stagingPathRoot = new Path(getTemporaryStagingDirectoryPath(session).replace("${USER}", context.getIdentity().getUser()));
} else {
stagingPathRoot = getStagingPathRoot(outputHandle);
}
assertThat(listAllDataFiles(context, stagingPathRoot)).filteredOn(file -> file.contains(".tmp-sort.")).size().isGreaterThan(bucketCount * getHiveConfig().getMaxOpenSortFiles() * 2);
// finish the write
Collection<Slice> fragments = getFutureValue(sink.finish());
// verify there are no temporary files
for (String file : listAllDataFiles(context, stagingPathRoot)) {
assertThat(file).doesNotContain(".tmp-sort.");
}
// finish creating table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
}
// verify that bucket files are sorted
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, table);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
// verify local sorting property
ConnectorTableProperties properties = metadata.getTableProperties(newSession(ImmutableMap.of("propagate_table_scan_sorting_properties", true, "bucket_execution_enabled", false)), tableHandle);
Map<String, Integer> columnIndex = indexColumns(columnHandles);
assertEquals(properties.getLocalProperties(), ImmutableList.of(new SortingProperty<>(columnHandles.get(columnIndex.get("value_asc")), ASC_NULLS_FIRST), new SortingProperty<>(columnHandles.get(columnIndex.get("value_desc")), DESC_NULLS_LAST)));
assertThat(metadata.getTableProperties(newSession(), tableHandle).getLocalProperties()).isEmpty();
List<ConnectorSplit> splits = getAllSplits(tableHandle, transaction, session);
assertThat(splits).hasSize(bucketCount);
int actualRowCount = 0;
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles, DynamicFilter.EMPTY)) {
String lastValueAsc = null;
long lastValueDesc = -1;
while (!pageSource.isFinished()) {
Page page = pageSource.getNextPage();
if (page == null) {
continue;
}
for (int i = 0; i < page.getPositionCount(); i++) {
Block blockAsc = page.getBlock(1);
Block blockDesc = page.getBlock(2);
assertFalse(blockAsc.isNull(i));
assertFalse(blockDesc.isNull(i));
String valueAsc = VARCHAR.getSlice(blockAsc, i).toStringUtf8();
if (lastValueAsc != null) {
assertGreaterThanOrEqual(valueAsc, lastValueAsc);
if (valueAsc.equals(lastValueAsc)) {
long valueDesc = BIGINT.getLong(blockDesc, i);
if (lastValueDesc != -1) {
assertLessThanOrEqual(valueDesc, lastValueDesc);
}
lastValueDesc = valueDesc;
} else {
lastValueDesc = -1;
}
}
lastValueAsc = valueAsc;
actualRowCount++;
}
}
}
}
assertThat(actualRowCount).isEqualTo(expectedRowCount);
}
}
Aggregations