use of org.apache.drill.metastore.metadata.BaseMetadata in project drill by apache.
the class MetadataControllerBatch method getMetadataUnits.
private List<TableMetadataUnit> getMetadataUnits(TupleReader reader, int nestingLevel) {
List<TableMetadataUnit> metadataUnits = new ArrayList<>();
TupleMetadata columnMetadata = reader.tupleSchema();
ObjectReader metadataColumnReader = reader.column(MetastoreAnalyzeConstants.METADATA_TYPE);
Preconditions.checkNotNull(metadataColumnReader, "metadataType column wasn't found");
ObjectReader underlyingMetadataReader = reader.column(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD);
if (underlyingMetadataReader != null) {
if (!underlyingMetadataReader.schema().isArray()) {
throw new IllegalStateException("Incoming vector with name `collected_map` should be repeated map");
}
// current row contains information about underlying metadata
ArrayReader array = underlyingMetadataReader.array();
while (array.next()) {
metadataUnits.addAll(getMetadataUnits(array.tuple(), nestingLevel + 1));
}
}
List<StatisticsHolder<?>> metadataStatistics = getMetadataStatistics(reader, columnMetadata);
Long rowCount = (Long) metadataStatistics.stream().filter(statisticsHolder -> statisticsHolder.getStatisticsKind() == TableStatisticsKind.ROW_COUNT).findAny().map(StatisticsHolder::getStatisticsValue).orElse(null);
Map<SchemaPath, ColumnStatistics<?>> columnStatistics = getColumnStatistics(reader, columnMetadata, rowCount);
MetadataType metadataType = MetadataType.valueOf(metadataColumnReader.scalar().getString());
BaseMetadata metadata;
switch(metadataType) {
case TABLE:
{
metadata = getTableMetadata(reader, metadataStatistics, columnStatistics);
break;
}
case SEGMENT:
{
metadata = getSegmentMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
break;
}
case PARTITION:
{
metadata = getPartitionMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
break;
}
case FILE:
{
metadata = getFileMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
break;
}
case ROW_GROUP:
{
metadata = getRowGroupMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
break;
}
default:
throw new UnsupportedOperationException("Unsupported metadata type: " + metadataType);
}
metadataUnits.add(metadata.toMetadataUnit());
return metadataUnits;
}
use of org.apache.drill.metastore.metadata.BaseMetadata in project drill by apache.
the class MetadataHandlerBatch method writeMetadata.
private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadata(List<T> metadataList) {
BaseMetadata firstElement = metadataList.iterator().next();
ResultSetLoader resultSetLoader = getResultSetLoaderForMetadata(firstElement);
resultSetLoader.startBatch();
RowSetLoader rowWriter = resultSetLoader.writer();
Iterator<T> segmentsIterator = metadataList.iterator();
while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
T metadata = segmentsIterator.next();
metadataToHandle.remove(metadata.getMetadataInfo().identifier());
List<Object> arguments = new ArrayList<>();
// adds required segment names to the arguments
arguments.add(metadata.getPath().toUri().getPath());
Collections.addAll(arguments, Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size()));
// adds column statistics values assuming that they are sorted in alphabetic order
// (see getResultSetLoaderForMetadata() method)
metadata.getColumnsStatistics().entrySet().stream().sorted(Comparator.comparing(e -> e.getKey().toExpr())).map(Map.Entry::getValue).flatMap(columnStatistics -> AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet().stream().map(columnStatistics::get)).forEach(arguments::add);
AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet().stream().map(metadata::getStatistic).forEach(arguments::add);
// collectedMap field value
arguments.add(new Object[] {});
if (metadataType == MetadataType.SEGMENT) {
arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
}
if (metadataType == MetadataType.ROW_GROUP) {
arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
}
arguments.add(metadata.getSchema().jsonString());
arguments.add(String.valueOf(metadata.getLastModifiedTime()));
arguments.add(metadataType.name());
rowWriter.addRow(arguments.toArray());
}
return resultSetLoader.harvest();
}
use of org.apache.drill.metastore.metadata.BaseMetadata in project drill by apache.
the class MetadataHandlerBatch method writeMetadataUsingBatchSchema.
private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadataUsingBatchSchema(List<T> metadataList) {
Preconditions.checkArgument(!metadataList.isEmpty(), "Metadata list shouldn't be empty.");
ResultSetLoader resultSetLoader = getResultSetLoaderWithBatchSchema();
resultSetLoader.startBatch();
RowSetLoader rowWriter = resultSetLoader.writer();
Iterator<T> segmentsIterator = metadataList.iterator();
while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
T metadata = segmentsIterator.next();
metadataToHandle.remove(metadata.getMetadataInfo().identifier());
List<Object> arguments = new ArrayList<>();
for (VectorWrapper<?> vectorWrapper : container) {
String[] identifierValues = Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size());
MaterializedField field = vectorWrapper.getField();
String fieldName = field.getName();
if (fieldName.equals(MetastoreAnalyzeConstants.LOCATION_FIELD)) {
arguments.add(metadata.getPath().toUri().getPath());
} else if (fieldName.equals(MetastoreAnalyzeConstants.LOCATIONS_FIELD)) {
if (metadataType == MetadataType.SEGMENT) {
arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
} else {
arguments.add(null);
}
} else if (popConfig.getContext().segmentColumns().contains(fieldName)) {
arguments.add(identifierValues[popConfig.getContext().segmentColumns().indexOf(fieldName)]);
} else if (AnalyzeColumnUtils.isColumnStatisticsField(fieldName)) {
arguments.add(metadata.getColumnStatistics(SchemaPath.parseFromString(AnalyzeColumnUtils.getColumnName(fieldName))).get(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
} else if (AnalyzeColumnUtils.isMetadataStatisticsField(fieldName)) {
arguments.add(metadata.getStatistic(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
} else if (fieldName.equals(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD)) {
// collectedMap field value
arguments.add(new Object[] {});
} else if (fieldName.equals(MetastoreAnalyzeConstants.SCHEMA_FIELD)) {
arguments.add(metadata.getSchema().jsonString());
} else if (fieldName.equals(columnNamesOptions.lastModifiedTime())) {
arguments.add(String.valueOf(metadata.getLastModifiedTime()));
} else if (fieldName.equals(columnNamesOptions.rowGroupIndex())) {
arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
} else if (fieldName.equals(columnNamesOptions.rowGroupStart())) {
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
} else if (fieldName.equals(columnNamesOptions.rowGroupLength())) {
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
} else if (fieldName.equals(MetastoreAnalyzeConstants.METADATA_TYPE)) {
arguments.add(metadataType.name());
} else {
throw new UnsupportedOperationException(String.format("Found unexpected field [%s] in incoming batch.", field));
}
}
rowWriter.addRow(arguments.toArray());
}
return resultSetLoader.harvest();
}
Aggregations