use of io.prestosql.spi.connector.ColumnMetadata in project hetu-core by openlookeng.
the class HBaseConnection method getColumnHandles.
private static List<HBaseColumnHandle> getColumnHandles(ConnectorTableMetadata meta, String rowIdColumn) {
// Get the column mappings from the table property or auto-generate columns if not defined
Map<String, Pair<String, String>> mapping = HBaseTableProperties.getColumnMapping(meta.getProperties()).orElse(autoGenerateMapping(meta.getColumns(), HBaseTableProperties.getLocalityGroups(meta.getProperties())));
// The list of indexed columns
Optional<List<String>> indexedColumns = HBaseTableProperties.getIndexColumns(meta.getProperties());
// And now we parse the configured columns and create handles for the metadata manager
List<HBaseColumnHandle> columns = new ArrayList<>();
for (int ordinal = 0; ordinal < meta.getColumns().size(); ++ordinal) {
ColumnMetadata cm = meta.getColumns().get(ordinal);
// Special case if this column is the row ID
if (cm.getName().equalsIgnoreCase(rowIdColumn)) {
columns.add(new HBaseColumnHandle(rowIdColumn, Optional.empty(), Optional.empty(), cm.getType(), ordinal, "HBase row ID", false));
} else {
if (!mapping.containsKey(cm.getName())) {
LOG.error("Misconfigured mapping for HBase column %s", cm.getName());
throw new InvalidParameterException(format("Misconfigured mapping for HBase column %s", cm.getName()));
}
// Get the mapping for this column
Pair<String, String> familyQualifier = mapping.get(cm.getName());
boolean indexed = indexedColumns.isPresent() && indexedColumns.get().contains(cm.getName().toLowerCase(Locale.ENGLISH));
String comment = format("HBase column %s:%s. Indexed: %b", familyQualifier.getLeft(), familyQualifier.getRight(), indexed);
// Create a new HBaseColumnHandle object
columns.add(new HBaseColumnHandle(cm.getName(), Optional.of(familyQualifier.getLeft()), Optional.of(familyQualifier.getRight()), cm.getType(), ordinal, comment, indexed));
}
}
return columns;
}
use of io.prestosql.spi.connector.ColumnMetadata in project hetu-core by openlookeng.
the class AbstractTestHive method indexColumns.
protected static ImmutableMap<String, Integer> indexColumns(ConnectorTableMetadata tableMetadata) {
ImmutableMap.Builder<String, Integer> index = ImmutableMap.builder();
int i = 0;
for (ColumnMetadata columnMetadata : tableMetadata.getColumns()) {
index.put(columnMetadata.getName(), i);
i++;
}
return index.build();
}
use of io.prestosql.spi.connector.ColumnMetadata in project hetu-core by openlookeng.
the class AbstractTestHiveFileSystem method createTable.
private void createTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the records
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the table
metadata.finishCreateTable(session, outputHandle, fragments, ImmutableList.of());
transaction.commit();
// Hack to work around the metastore not being configured for S3 or other FS.
// The metastore tries to validate the location when creating the
// table, which fails without explicit configuration for file system.
// We work around that by using a dummy location when creating the
// table and update it here to the correct location.
metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.getTableWriteInfo(((HiveOutputTableHandle) outputHandle).getLocationHandle(), false).getTargetPath().toString());
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
// verify the data
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, tableHandle, UNGROUPED_SCHEDULING);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle, columnHandles)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
}
}
}
use of io.prestosql.spi.connector.ColumnMetadata in project hetu-core by openlookeng.
the class TestColumnTypeCacheable method testBigintCacheable.
@Test
public void testBigintCacheable() {
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setDynamicFilterPartitionFilteringEnabled(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
ColumnMetadata ptdMetadata = new ColumnMetadata("pt_d", BIGINT);
Set<TupleDomain<ColumnMetadata>> cachePredicates = ImmutableSet.of(TupleDomain.withColumnDomains(ImmutableMap.of(ptdMetadata, Domain.singleValue(BIGINT, 20200522L))), TupleDomain.withColumnDomains(ImmutableMap.of(ptdMetadata, Domain.singleValue(BIGINT, 20200521L))));
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(session, "database", "table", 10, 10000, new DataSize(10, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, cachePredicates, null, new HiveConfig(), HiveStorageFormat.ORC);
int[] idPrefix = new int[] { 1 };
ImmutableMap.of("__HIVE_DEFAULT_PARTITION__", 1, "20200520", 2, "20200521", 3, "20200522", 2).forEach((ptdValue, splitCount) -> {
for (int i = 1; i <= splitCount; i++) {
hiveSplitSource.addToQueue(new TestPartitionSplit(idPrefix[0] * 10 + i, ImmutableList.of(new HivePartitionKey("pt_d", ptdValue)), "pt_d=" + ptdValue));
}
idPrefix[0] = idPrefix[0] + 1;
});
List<ConnectorSplit> splits = getSplits(hiveSplitSource, 10);
assertEquals(splits.size(), 8);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).count(), 5);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).map(HiveSplitWrapper::getOnlyHiveSplit).filter(hiveSplit -> hiveSplit.getPartitionKeys().contains(new HivePartitionKey("pt_d", "20200521"))).count(), 3);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).map(HiveSplitWrapper::getOnlyHiveSplit).filter(hiveSplit -> hiveSplit.getPartitionKeys().contains(new HivePartitionKey("pt_d", "20200522"))).count(), 2);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).map(HiveSplitWrapper::getOnlyHiveSplit).filter(hiveSplit -> hiveSplit.getPartitionKeys().contains(new HivePartitionKey("pt_d", "20200520"))).count(), 0);
}
use of io.prestosql.spi.connector.ColumnMetadata in project hetu-core by openlookeng.
the class TestColumnTypeCacheable method testDoubleTypeCacheable.
@Test
public void testDoubleTypeCacheable() {
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setDynamicFilterPartitionFilteringEnabled(false), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
ColumnMetadata ptdMetadata = new ColumnMetadata("pt_d", DOUBLE);
Set<TupleDomain<ColumnMetadata>> cachePredicates = ImmutableSet.of(TupleDomain.withColumnDomains(ImmutableMap.of(ptdMetadata, Domain.singleValue(DOUBLE, 1.0d))), TupleDomain.withColumnDomains(ImmutableMap.of(ptdMetadata, Domain.singleValue(DOUBLE, 1000.10d))));
HiveSplitSource hiveSplitSource = HiveSplitSource.allAtOnce(session, "database", "table", 10, 10000, new DataSize(10, MEGABYTE), Integer.MAX_VALUE, new TestingHiveSplitLoader(), Executors.newFixedThreadPool(5), new CounterStat(), null, cachePredicates, null, new HiveConfig(), HiveStorageFormat.ORC);
int[] idPrefix = new int[] { 1 };
ImmutableMap.of("__HIVE_DEFAULT_PARTITION__", 1, "1.0", 2, "2", 3, "1000.10", 4).forEach((ptdValue, splitCount) -> {
for (int i = 1; i <= splitCount; i++) {
hiveSplitSource.addToQueue(new TestPartitionSplit(idPrefix[0] * 10 + i, ImmutableList.of(new HivePartitionKey("pt_d", ptdValue)), "pt_d=" + ptdValue));
}
idPrefix[0] = idPrefix[0] + 1;
});
List<ConnectorSplit> splits = getSplits(hiveSplitSource, 10);
assertEquals(splits.size(), 10);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).count(), 6);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).map(HiveSplitWrapper::getOnlyHiveSplit).filter(hiveSplit -> hiveSplit.getPartitionKeys().contains(new HivePartitionKey("pt_d", "1.0"))).count(), 2);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).map(HiveSplitWrapper::getOnlyHiveSplit).filter(hiveSplit -> hiveSplit.getPartitionKeys().contains(new HivePartitionKey("pt_d", "2"))).count(), 0);
assertEquals(splits.stream().filter(ConnectorSplit::isCacheable).map(HiveSplitWrapper::getOnlyHiveSplit).filter(hiveSplit -> hiveSplit.getPartitionKeys().contains(new HivePartitionKey("pt_d", "1000.10"))).count(), 4);
}
Aggregations