use of org.apache.drill.metastore.metadata.PartitionMetadata in project drill by apache.
the class AbstractParquetGroupScan method modifyFileSelection.
// limit push down methods end
// helper method used for partition pruning and filter push down
@Override
public void modifyFileSelection(FileSelection selection) {
super.modifyFileSelection(selection);
List<Path> files = selection.getFiles();
fileSet = new HashSet<>(files);
entries = new ArrayList<>(files.size());
entries.addAll(files.stream().map(ReadEntryWithPath::new).collect(Collectors.toList()));
Multimap<Path, RowGroupMetadata> newRowGroups = LinkedListMultimap.create();
if (!getRowGroupsMetadata().isEmpty()) {
getRowGroupsMetadata().entries().stream().filter(entry -> fileSet.contains(entry.getKey())).forEachOrdered(entry -> newRowGroups.put(entry.getKey(), entry.getValue()));
}
this.rowGroups = newRowGroups;
tableMetadata = TableMetadataUtils.updateRowCount(getTableMetadata(), getRowGroupsMetadata().values());
if (!getFilesMetadata().isEmpty()) {
this.files = getFilesMetadata().entrySet().stream().filter(entry -> fileSet.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
} else {
this.files = Collections.emptyMap();
}
List<PartitionMetadata> newPartitions = new ArrayList<>();
if (!getPartitionsMetadata().isEmpty()) {
for (PartitionMetadata entry : getPartitionsMetadata()) {
for (Path partLocation : entry.getLocations()) {
if (fileSet.contains(partLocation)) {
newPartitions.add(entry);
break;
}
}
}
}
partitions = newPartitions;
if (!getSegmentsMetadata().isEmpty()) {
this.segments = getSegmentsMetadata().entrySet().stream().filter(entry -> fileSet.contains(entry.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
rowGroupInfos = null;
}
use of org.apache.drill.metastore.metadata.PartitionMetadata in project drill by apache.
the class BasicTablesTransformer method all.
public static MetadataHolder all(List<TableMetadataUnit> units) {
List<BaseTableMetadata> tables = new ArrayList<>();
List<SegmentMetadata> segments = new ArrayList<>();
List<FileMetadata> files = new ArrayList<>();
List<RowGroupMetadata> rowGroups = new ArrayList<>();
List<PartitionMetadata> partitions = new ArrayList<>();
for (TableMetadataUnit unit : units) {
MetadataType metadataType = MetadataType.fromValue(unit.metadataType());
if (metadataType == null) {
continue;
}
switch(metadataType) {
case TABLE:
tables.add(BaseTableMetadata.builder().metadataUnit(unit).build());
break;
case SEGMENT:
segments.add(SegmentMetadata.builder().metadataUnit(unit).build());
break;
case FILE:
files.add(FileMetadata.builder().metadataUnit(unit).build());
break;
case ROW_GROUP:
rowGroups.add(RowGroupMetadata.builder().metadataUnit(unit).build());
break;
case PARTITION:
partitions.add(PartitionMetadata.builder().metadataUnit(unit).build());
break;
default:
// Ignore unsupported type
break;
}
}
return new MetadataHolder(tables, segments, files, rowGroups, partitions);
}
use of org.apache.drill.metastore.metadata.PartitionMetadata in project drill by apache.
the class MetadataControllerBatch method getPartitionMetadata.
private PartitionMetadata getPartitionMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
List<String> segmentColumns = popConfig.getContext().segmentColumns();
String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
List<String> partitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(partitionValues);
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
return PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).locations(getIncomingLocations(reader)).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
use of org.apache.drill.metastore.metadata.PartitionMetadata in project drill by apache.
the class TestTableMetadataUnitConversion method testPartitionMetadata.
@Test
public void testPartitionMetadata() {
TableInfo tableInfo = data.basicTableInfo;
SchemaPath column = SchemaPath.getSimplePath("part_varchar");
List<String> partitionValues = Collections.singletonList("g");
Set<Path> locations = new HashSet<>();
locations.add(new Path("part_int=3/part_varchar=g/0_0_0.parquet"));
locations.add(new Path("part_int=3/part_varchar=g/0_0_1.parquet"));
MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build();
PartitionMetadata metadata = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).column(column).partitionValues(partitionValues).locations(locations).build();
TableMetadataUnit expectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).column(column.toString()).partitionValues(partitionValues).locations(locations.stream().map(location -> location.toUri().getPath()).collect(Collectors.toList())).build();
TableMetadataUnit actualUnit = metadata.toMetadataUnit();
assertEquals(expectedUnit, actualUnit);
assertNotNull(PartitionMetadata.builder().metadataUnit(actualUnit).build());
}
use of org.apache.drill.metastore.metadata.PartitionMetadata in project drill by apache.
the class TestInfoSchemaWithMetastore method testPartitions.
@Test
public void testPartitions() throws Exception {
String tableName = "table_with_partitions";
ZonedDateTime currentTime = currentUtcTime();
TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name(tableName).type("PARQUET").build();
SegmentMetadata defaultSegment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key(MetadataInfo.DEFAULT_SEGMENT_KEY).build()).path(new Path("/tmp", tableName)).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
SegmentMetadata segment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key("part_int=3").identifier("part_int=3").build()).column(SchemaPath.parseFromString("dir0")).partitionValues(Collections.singletonList("part_int=3")).path(new Path(String.format("/tmp/%s/part_int=3", tableName))).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
PartitionMetadata partition = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build()).column(SchemaPath.parseFromString("part_varchar")).partitionValues(Collections.singletonList("g")).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
metastore.tables().modify().overwrite(defaultSegment.toMetadataUnit(), segment.toMetadataUnit(), partition.toMetadataUnit()).execute();
List<String> columns = Arrays.asList(InfoSchemaConstants.SHRD_COL_TABLE_CATALOG, InfoSchemaConstants.SHRD_COL_TABLE_SCHEMA, InfoSchemaConstants.SHRD_COL_TABLE_NAME, InfoSchemaConstants.PARTITIONS_COL_METADATA_KEY, InfoSchemaConstants.PARTITIONS_COL_METADATA_TYPE, InfoSchemaConstants.PARTITIONS_COL_METADATA_IDENTIFIER, InfoSchemaConstants.PARTITIONS_COL_PARTITION_COLUMN, InfoSchemaConstants.PARTITIONS_COL_PARTITION_VALUE, InfoSchemaConstants.PARTITIONS_COL_LOCATION, InfoSchemaConstants.PARTITIONS_COL_LAST_MODIFIED_TIME);
client.testBuilder().sqlQuery("select %s from information_schema.`partitions` where table_name = '%s'", String.join(", ", columns), tableName).unOrdered().baselineColumns(columns.toArray(new String[0])).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.SEGMENT.name(), "part_int=3", "`dir0`", "part_int=3", "/tmp/table_with_partitions/part_int=3", currentTime.toLocalDateTime()).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.PARTITION.name(), "part_int=3/part_varchar=g", "`part_varchar`", "g", null, currentTime.toLocalDateTime()).go();
}
Aggregations