use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class MetadataControllerBatch method getRowGroupMetadata.
private RowGroupMetadata getRowGroupMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 2).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
int rowGroupIndex = Integer.parseInt(reader.column(columnNamesOptions.rowGroupIndex()).scalar().getString());
String metadataIdentifier = MetadataIdentifierUtils.getRowGroupMetadataIdentifier(partitionValues, path, rowGroupIndex);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.ROW_GROUP).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
return RowGroupMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).hostAffinity(Collections.emptyMap()).rowGroupIndex(rowGroupIndex).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class MetadataControllerBatch method getSegmentMetadata.
private SegmentMetadata getSegmentMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
// and therefore all values should be used when forming metadata identifier
if (popConfig.getContext().multiValueSegments()) {
nestingLevel = segmentColumns.size();
}
List<String> allPartitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(allPartitionValues);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.SEGMENT).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
int segmentLevel = nestingLevel - 1;
// for the case of multi-value segments, there is no nesting,
// so all partition column values should be used
List<String> partitionValues = popConfig.getContext().multiValueSegments() ? allPartitionValues : Collections.singletonList(allPartitionValues.get(segmentLevel));
return SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString())).locations(getIncomingLocations(reader)).column(segmentColumns.size() > 0 ? SchemaPath.getSimplePath(segmentColumns.get(segmentLevel)) : null).partitionValues(partitionValues).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class MetadataControllerBatch method getPartitionMetadata.
private PartitionMetadata getPartitionMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
List<String> partitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(partitionValues);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
return PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).locations(getIncomingLocations(reader)).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class MetadataControllerBatch method getFileMetadata.
private FileMetadata getFileMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 1).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
String metadataIdentifier = MetadataIdentifierUtils.getFileMetadataIdentifier(partitionValues, path);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.FILE).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
return FileMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.
the class AbstractTablesMetastoreTest method testWriteReadAllFieldTypes.
@Test
public void testWriteReadAllFieldTypes() {
TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name("nation").build();
TableMetadataUnit tableUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey(MetadataInfo.GENERAL_INFO_KEY).metadataType(MetadataType.TABLE.name()).metadataStatistics(Collections.singletonList("{\"statisticsValue\":2.1," + "\"statisticsKind\":{\"name\":\"approx_count_distinct\"}}")).columnsStatistics(Collections.singletonMap("`name`", "{\"statistics\":[{\"statisticsValue\":\"aaa\"," + "\"statisticsKind\":{\"exact\":true,\"name\":\"minValue\"}},{\"statisticsValue\":\"zzz\"," + "\"statisticsKind\":{\"exact\":true,\"name\":\"maxValue\"}}],\"type\":\"VARCHAR\"}")).lastModifiedTime(System.currentTimeMillis()).build();
TableMetadataUnit rowGroupUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey("1994").metadataIdentifier("1994/Q1/0_0_0.parquet/1").metadataType(MetadataType.ROW_GROUP.name()).lastModifiedTime(System.currentTimeMillis()).location("/tmp/nation").path("/tmp/nation/1/0_0_0.parquet").rowGroupIndex(1).hostAffinity(Collections.singletonMap("host1", 0.1F)).build();
tables.modify().overwrite(tableUnit, rowGroupUnit).execute();
List<TableMetadataUnit> tableUnits = tables.read().metadataType(MetadataType.TABLE).filter(tableInfo.toFilter()).execute();
assertEquals(1, tableUnits.size());
assertEquals(tableUnit, tableUnits.get(0));
List<TableMetadataUnit> rowGroupUnits = tables.read().metadataType(MetadataType.ROW_GROUP).filter(tableInfo.toFilter()).execute();
assertEquals(1, rowGroupUnits.size());
assertEquals(rowGroupUnit, rowGroupUnits.get(0));
}
Aggregations