use of org.apache.drill.metastore.metadata.LocationProvider in project drill by apache.
the class MetadataHandlerBatch method writeMetadata.
private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadata(List<T> metadataList) {
BaseMetadata firstElement = metadataList.iterator().next();
ResultSetLoader resultSetLoader = getResultSetLoaderForMetadata(firstElement);
resultSetLoader.startBatch();
RowSetLoader rowWriter = resultSetLoader.writer();
Iterator<T> segmentsIterator = metadataList.iterator();
while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
T metadata = segmentsIterator.next();
metadataToHandle.remove(metadata.getMetadataInfo().identifier());
List<Object> arguments = new ArrayList<>();
// adds required segment names to the arguments
arguments.add(metadata.getPath().toUri().getPath());
Collections.addAll(arguments, Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size()));
// adds column statistics values assuming that they are sorted in alphabetic order
// (see getResultSetLoaderForMetadata() method)
metadata.getColumnsStatistics().entrySet().stream().sorted(Comparator.comparing(e -> e.getKey().toExpr())).map(Map.Entry::getValue).flatMap(columnStatistics -> AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet().stream().map(columnStatistics::get)).forEach(arguments::add);
AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet().stream().map(metadata::getStatistic).forEach(arguments::add);
// collectedMap field value
arguments.add(new Object[] {});
if (metadataType == MetadataType.SEGMENT) {
arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
}
if (metadataType == MetadataType.ROW_GROUP) {
arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
}
arguments.add(metadata.getSchema().jsonString());
arguments.add(String.valueOf(metadata.getLastModifiedTime()));
arguments.add(metadataType.name());
rowWriter.addRow(arguments.toArray());
}
return resultSetLoader.harvest();
}
use of org.apache.drill.metastore.metadata.LocationProvider in project drill by apache.
the class MetadataHandlerBatch method writeMetadataUsingBatchSchema.
private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadataUsingBatchSchema(List<T> metadataList) {
Preconditions.checkArgument(!metadataList.isEmpty(), "Metadata list shouldn't be empty.");
ResultSetLoader resultSetLoader = getResultSetLoaderWithBatchSchema();
resultSetLoader.startBatch();
RowSetLoader rowWriter = resultSetLoader.writer();
Iterator<T> segmentsIterator = metadataList.iterator();
while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
T metadata = segmentsIterator.next();
metadataToHandle.remove(metadata.getMetadataInfo().identifier());
List<Object> arguments = new ArrayList<>();
for (VectorWrapper<?> vectorWrapper : container) {
String[] identifierValues = Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size());
MaterializedField field = vectorWrapper.getField();
String fieldName = field.getName();
if (fieldName.equals(MetastoreAnalyzeConstants.LOCATION_FIELD)) {
arguments.add(metadata.getPath().toUri().getPath());
} else if (fieldName.equals(MetastoreAnalyzeConstants.LOCATIONS_FIELD)) {
if (metadataType == MetadataType.SEGMENT) {
arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
} else {
arguments.add(null);
}
} else if (popConfig.getContext().segmentColumns().contains(fieldName)) {
arguments.add(identifierValues[popConfig.getContext().segmentColumns().indexOf(fieldName)]);
} else if (AnalyzeColumnUtils.isColumnStatisticsField(fieldName)) {
arguments.add(metadata.getColumnStatistics(SchemaPath.parseFromString(AnalyzeColumnUtils.getColumnName(fieldName))).get(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
} else if (AnalyzeColumnUtils.isMetadataStatisticsField(fieldName)) {
arguments.add(metadata.getStatistic(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
} else if (fieldName.equals(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD)) {
// collectedMap field value
arguments.add(new Object[] {});
} else if (fieldName.equals(MetastoreAnalyzeConstants.SCHEMA_FIELD)) {
arguments.add(metadata.getSchema().jsonString());
} else if (fieldName.equals(columnNamesOptions.lastModifiedTime())) {
arguments.add(String.valueOf(metadata.getLastModifiedTime()));
} else if (fieldName.equals(columnNamesOptions.rowGroupIndex())) {
arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
} else if (fieldName.equals(columnNamesOptions.rowGroupStart())) {
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
} else if (fieldName.equals(columnNamesOptions.rowGroupLength())) {
arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
} else if (fieldName.equals(MetastoreAnalyzeConstants.METADATA_TYPE)) {
arguments.add(metadataType.name());
} else {
throw new UnsupportedOperationException(String.format("Found unexpected field [%s] in incoming batch.", field));
}
}
rowWriter.addRow(arguments.toArray());
}
return resultSetLoader.harvest();
}
Aggregations