use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.
the class BaseParquetMetadataProvider method getSegmentsMetadataMap.
@SuppressWarnings("unused")
@Override
public Map<Path, SegmentMetadata> getSegmentsMetadataMap() {
if (segments == null) {
if (entries.isEmpty() || !collectMetadata) {
return Collections.emptyMap();
}
segments = new LinkedHashMap<>();
Path fileLocation = getFilesMetadataMap().values().iterator().next().getPath();
int levelsCount = fileLocation.depth() - tableLocation.depth();
Map<Path, FileMetadata> filesMetadata = getFilesMetadataMap();
int segmentsIndex = levelsCount - 1;
Map<Path, SegmentMetadata> segmentMetadata = getSegmentsForMetadata(filesMetadata, SchemaPath.getSimplePath(MetadataInfo.DEFAULT_COLUMN_PREFIX + segmentsIndex));
segments.putAll(segmentMetadata);
for (int i = segmentsIndex - 1; i >= 0; i--) {
String segmentColumn = MetadataInfo.DEFAULT_COLUMN_PREFIX + i;
segmentMetadata = getMetadataForSegments(segmentMetadata, SchemaPath.getSimplePath(segmentColumn));
segments.putAll(segmentMetadata);
}
}
return segments;
}
use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.
the class BasicTablesTransformer method all.
public static MetadataHolder all(List<TableMetadataUnit> units) {
List<BaseTableMetadata> tables = new ArrayList<>();
List<SegmentMetadata> segments = new ArrayList<>();
List<FileMetadata> files = new ArrayList<>();
List<RowGroupMetadata> rowGroups = new ArrayList<>();
List<PartitionMetadata> partitions = new ArrayList<>();
for (TableMetadataUnit unit : units) {
MetadataType metadataType = MetadataType.fromValue(unit.metadataType());
if (metadataType == null) {
continue;
}
switch(metadataType) {
case TABLE:
tables.add(BaseTableMetadata.builder().metadataUnit(unit).build());
break;
case SEGMENT:
segments.add(SegmentMetadata.builder().metadataUnit(unit).build());
break;
case FILE:
files.add(FileMetadata.builder().metadataUnit(unit).build());
break;
case ROW_GROUP:
rowGroups.add(RowGroupMetadata.builder().metadataUnit(unit).build());
break;
case PARTITION:
partitions.add(PartitionMetadata.builder().metadataUnit(unit).build());
break;
default:
// Ignore unsupported type
break;
}
}
return new MetadataHolder(tables, segments, files, rowGroups, partitions);
}
use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.
the class MetadataControllerBatch method getSegmentMetadata.
private SegmentMetadata getSegmentMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
// and therefore all values should be used when forming metadata identifier
if (popConfig.getContext().multiValueSegments()) {
nestingLevel = segmentColumns.size();
}
List<String> allPartitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(allPartitionValues);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.SEGMENT).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
int segmentLevel = nestingLevel - 1;
// for the case of multi-value segments, there is no nesting,
// so all partition column values should be used
List<String> partitionValues = popConfig.getContext().multiValueSegments() ? allPartitionValues : Collections.singletonList(allPartitionValues.get(segmentLevel));
return SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString())).locations(getIncomingLocations(reader)).column(segmentColumns.size() > 0 ? SchemaPath.getSimplePath(segmentColumns.get(segmentLevel)) : null).partitionValues(partitionValues).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.
the class TestInfoSchemaWithMetastore method testPartitions.
@Test
public void testPartitions() throws Exception {
String tableName = "table_with_partitions";
ZonedDateTime currentTime = currentUtcTime();
TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name(tableName).type("PARQUET").build();
SegmentMetadata defaultSegment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key(MetadataInfo.DEFAULT_SEGMENT_KEY).build()).path(new Path("/tmp", tableName)).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
SegmentMetadata segment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key("part_int=3").identifier("part_int=3").build()).column(SchemaPath.parseFromString("dir0")).partitionValues(Collections.singletonList("part_int=3")).path(new Path(String.format("/tmp/%s/part_int=3", tableName))).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
PartitionMetadata partition = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build()).column(SchemaPath.parseFromString("part_varchar")).partitionValues(Collections.singletonList("g")).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
metastore.tables().modify().overwrite(defaultSegment.toMetadataUnit(), segment.toMetadataUnit(), partition.toMetadataUnit()).execute();
List<String> columns = Arrays.asList(InfoSchemaConstants.SHRD_COL_TABLE_CATALOG, InfoSchemaConstants.SHRD_COL_TABLE_SCHEMA, InfoSchemaConstants.SHRD_COL_TABLE_NAME, InfoSchemaConstants.PARTITIONS_COL_METADATA_KEY, InfoSchemaConstants.PARTITIONS_COL_METADATA_TYPE, InfoSchemaConstants.PARTITIONS_COL_METADATA_IDENTIFIER, InfoSchemaConstants.PARTITIONS_COL_PARTITION_COLUMN, InfoSchemaConstants.PARTITIONS_COL_PARTITION_VALUE, InfoSchemaConstants.PARTITIONS_COL_LOCATION, InfoSchemaConstants.PARTITIONS_COL_LAST_MODIFIED_TIME);
client.testBuilder().sqlQuery("select %s from information_schema.`partitions` where table_name = '%s'", String.join(", ", columns), tableName).unOrdered().baselineColumns(columns.toArray(new String[0])).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.SEGMENT.name(), "part_int=3", "`dir0`", "part_int=3", "/tmp/table_with_partitions/part_int=3", currentTime.toLocalDateTime()).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.PARTITION.name(), "part_int=3/part_varchar=g", "`part_varchar`", "g", null, currentTime.toLocalDateTime()).go();
}
use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.
the class TestMetastoreCommands method testSimpleAnalyze.
@Test
public void testSimpleAnalyze() throws Exception {
String tableName = "multilevel/parquetSimpleAnalyze";
TableInfo tableInfo = getTableInfo(tableName, "default");
File table = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel/parquet"), Paths.get(tableName));
Path tablePath = new Path(table.toURI().getPath());
BaseTableMetadata expectedTableMetadata = getBaseTableMetadata(tableInfo, table);
TableInfo baseTableInfo = TableInfo.builder().name(tableName).storagePlugin("dfs").workspace("default").build();
SegmentMetadata dir0 = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(tablePath, "1994")).schema(SCHEMA).lastModifiedTime(getMaxLastModified(new File(table, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(DIR0_1994_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.parquet"), new Path(tablePath, "1994/Q2/orders_94_q2.parquet"), new Path(tablePath, "1994/Q3/orders_94_q3.parquet"), new Path(tablePath, "1994/Q4/orders_94_q4.parquet"))).partitionValues(Collections.singletonList("1994")).build();
Set<Path> expectedTopLevelSegmentLocations = ImmutableSet.of(new Path(tablePath, "1994"), new Path(tablePath, "1995"), new Path(tablePath, "1996"));
Set<Set<Path>> expectedSegmentFilesLocations = new HashSet<>();
Set<Path> segmentFiles = ImmutableSet.of(new Path(tablePath, "1994/Q2/orders_94_q2.parquet"), new Path(tablePath, "1994/Q4/orders_94_q4.parquet"), new Path(tablePath, "1994/Q1/orders_94_q1.parquet"), new Path(tablePath, "1994/Q3/orders_94_q3.parquet"));
expectedSegmentFilesLocations.add(segmentFiles);
segmentFiles = ImmutableSet.of(new Path(tablePath, "1995/Q2/orders_95_q2.parquet"), new Path(tablePath, "1995/Q4/orders_95_q4.parquet"), new Path(tablePath, "1995/Q1/orders_95_q1.parquet"), new Path(tablePath, "1995/Q3/orders_95_q3.parquet"));
expectedSegmentFilesLocations.add(segmentFiles);
segmentFiles = ImmutableSet.of(new Path(tablePath, "1996/Q3/orders_96_q3.parquet"), new Path(tablePath, "1996/Q2/orders_96_q2.parquet"), new Path(tablePath, "1996/Q4/orders_96_q4.parquet"), new Path(tablePath, "1996/Q1/orders_96_q1.parquet"));
expectedSegmentFilesLocations.add(segmentFiles);
long dir0q1lastModified = new File(new File(new File(table, "1994"), "Q1"), "orders_94_q1.parquet").lastModified();
FileMetadata dir01994q1File = FileMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.parquet").key("1994").build()).schema(SCHEMA).lastModifiedTime(dir0q1lastModified).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(tablePath, "1994/Q1/orders_94_q1.parquet")).build();
RowGroupMetadata dir01994q1rowGroup = RowGroupMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.ROW_GROUP).identifier("1994/Q1/orders_94_q1.parquet/0").key("1994").build()).schema(SCHEMA).rowGroupIndex(0).hostAffinity(Collections.emptyMap()).lastModifiedTime(dir0q1lastModified).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Arrays.asList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(1196L, new BaseStatisticsKind<>(ExactStatisticsConstants.LENGTH, true)), new StatisticsHolder<>(4L, new BaseStatisticsKind<>(ExactStatisticsConstants.START, true)))).path(new Path(tablePath, "1994/Q1/orders_94_q1.parquet")).build();
try {
testBuilder().sqlQuery("ANALYZE TABLE dfs.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, actualTableMetadata);
List<SegmentMetadata> topSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir0`");
SegmentMetadata actualDir0Metadata = topSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994")).findAny().orElseThrow(() -> new AssertionError("Segment is absent"));
Set<Path> locations = actualDir0Metadata.getLocations();
actualDir0Metadata.toBuilder().locations(locations);
assertEquals(dir0, actualDir0Metadata);
Set<Path> topLevelSegmentLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocation).collect(Collectors.toSet());
// verify top segments locations
assertEquals(expectedTopLevelSegmentLocations, topLevelSegmentLocations);
Set<Set<Path>> segmentFilesLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocations).collect(Collectors.toSet());
assertEquals(expectedSegmentFilesLocations, segmentFilesLocations);
// verify nested segments
List<SegmentMetadata> nestedSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir1`");
assertEquals(12, nestedSegmentMetadata.size());
SegmentMetadata dir01994q1Segment = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(tablePath, "1994"), "Q1")).schema(SCHEMA).lastModifiedTime(getMaxLastModified(new File(new File(table, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.parquet"))).partitionValues(Collections.singletonList("Q1")).build();
// verify segment for 1994
assertEquals(dir01994q1Segment, nestedSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1")).findAny().orElse(null));
// verify files metadata
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(12, filesMetadata.size());
// verify first file metadata
assertEquals(dir01994q1File, filesMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.parquet")).findAny().orElse(null));
// verify row groups metadata
List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
assertEquals(12, rowGroupsMetadata.size());
// verify first row group dir01994q1rowGroup
assertEquals(dir01994q1rowGroup, rowGroupsMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.parquet/0")).findAny().orElse(null));
} finally {
run("analyze table dfs.`%s` drop metadata if exists", tableName);
}
}
Aggregations