Search in sources :

Example 1 with BaseMetadata

use of org.apache.drill.metastore.metadata.BaseMetadata in project drill by apache.

the class MetadataControllerBatch method getMetadataUnits.

private List<TableMetadataUnit> getMetadataUnits(TupleReader reader, int nestingLevel) {
    List<TableMetadataUnit> metadataUnits = new ArrayList<>();
    TupleMetadata columnMetadata = reader.tupleSchema();
    ObjectReader metadataColumnReader = reader.column(MetastoreAnalyzeConstants.METADATA_TYPE);
    Preconditions.checkNotNull(metadataColumnReader, "metadataType column wasn't found");
    ObjectReader underlyingMetadataReader = reader.column(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD);
    if (underlyingMetadataReader != null) {
        if (!underlyingMetadataReader.schema().isArray()) {
            throw new IllegalStateException("Incoming vector with name `collected_map` should be repeated map");
        }
        // current row contains information about underlying metadata
        ArrayReader array = underlyingMetadataReader.array();
        while (array.next()) {
            metadataUnits.addAll(getMetadataUnits(array.tuple(), nestingLevel + 1));
        }
    }
    List<StatisticsHolder<?>> metadataStatistics = getMetadataStatistics(reader, columnMetadata);
    Long rowCount = (Long) metadataStatistics.stream().filter(statisticsHolder -> statisticsHolder.getStatisticsKind() == TableStatisticsKind.ROW_COUNT).findAny().map(StatisticsHolder::getStatisticsValue).orElse(null);
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = getColumnStatistics(reader, columnMetadata, rowCount);
    MetadataType metadataType = MetadataType.valueOf(metadataColumnReader.scalar().getString());
    BaseMetadata metadata;
    switch(metadataType) {
        case TABLE:
            {
                metadata = getTableMetadata(reader, metadataStatistics, columnStatistics);
                break;
            }
        case SEGMENT:
            {
                metadata = getSegmentMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case PARTITION:
            {
                metadata = getPartitionMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case FILE:
            {
                metadata = getFileMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case ROW_GROUP:
            {
                metadata = getRowGroupMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        default:
            throw new UnsupportedOperationException("Unsupported metadata type: " + metadataType);
    }
    metadataUnits.add(metadata.toMetadataUnit());
    return metadataUnits;
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) ArrayList(java.util.ArrayList) MetadataType(org.apache.drill.metastore.metadata.MetadataType) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader)

Example 2 with BaseMetadata

use of org.apache.drill.metastore.metadata.BaseMetadata in project drill by apache.

the class MetadataHandlerBatch method writeMetadata.

private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadata(List<T> metadataList) {
    BaseMetadata firstElement = metadataList.iterator().next();
    ResultSetLoader resultSetLoader = getResultSetLoaderForMetadata(firstElement);
    resultSetLoader.startBatch();
    RowSetLoader rowWriter = resultSetLoader.writer();
    Iterator<T> segmentsIterator = metadataList.iterator();
    while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
        T metadata = segmentsIterator.next();
        metadataToHandle.remove(metadata.getMetadataInfo().identifier());
        List<Object> arguments = new ArrayList<>();
        // adds required segment names to the arguments
        arguments.add(metadata.getPath().toUri().getPath());
        Collections.addAll(arguments, Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size()));
        // adds column statistics values assuming that they are sorted in alphabetic order
        // (see getResultSetLoaderForMetadata() method)
        metadata.getColumnsStatistics().entrySet().stream().sorted(Comparator.comparing(e -> e.getKey().toExpr())).map(Map.Entry::getValue).flatMap(columnStatistics -> AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet().stream().map(columnStatistics::get)).forEach(arguments::add);
        AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet().stream().map(metadata::getStatistic).forEach(arguments::add);
        // collectedMap field value
        arguments.add(new Object[] {});
        if (metadataType == MetadataType.SEGMENT) {
            arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
        }
        if (metadataType == MetadataType.ROW_GROUP) {
            arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
            arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
            arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
        }
        arguments.add(metadata.getSchema().jsonString());
        arguments.add(String.valueOf(metadata.getLastModifiedTime()));
        arguments.add(metadataType.name());
        rowWriter.addRow(arguments.toArray());
    }
    return resultSetLoader.harvest();
}
Also used : AbstractSingleRecordBatch(org.apache.drill.exec.record.AbstractSingleRecordBatch) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Function(java.util.function.Function) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) StreamSupport(java.util.stream.StreamSupport) NONE(org.apache.drill.exec.record.RecordBatch.IterOutcome.NONE) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) Iterator(java.util.Iterator) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) MetadataHandlerPOP(org.apache.drill.exec.physical.config.MetadataHandlerPOP) LocationProvider(org.apache.drill.metastore.metadata.LocationProvider) VarCharVector(org.apache.drill.exec.vector.VarCharVector) Tables(org.apache.drill.metastore.components.tables.Tables) Comparator(java.util.Comparator) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) Collections(java.util.Collections) ArrayList(java.util.ArrayList) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader)

Example 3 with BaseMetadata

use of org.apache.drill.metastore.metadata.BaseMetadata in project drill by apache.

the class MetadataHandlerBatch method writeMetadataUsingBatchSchema.

private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadataUsingBatchSchema(List<T> metadataList) {
    Preconditions.checkArgument(!metadataList.isEmpty(), "Metadata list shouldn't be empty.");
    ResultSetLoader resultSetLoader = getResultSetLoaderWithBatchSchema();
    resultSetLoader.startBatch();
    RowSetLoader rowWriter = resultSetLoader.writer();
    Iterator<T> segmentsIterator = metadataList.iterator();
    while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
        T metadata = segmentsIterator.next();
        metadataToHandle.remove(metadata.getMetadataInfo().identifier());
        List<Object> arguments = new ArrayList<>();
        for (VectorWrapper<?> vectorWrapper : container) {
            String[] identifierValues = Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size());
            MaterializedField field = vectorWrapper.getField();
            String fieldName = field.getName();
            if (fieldName.equals(MetastoreAnalyzeConstants.LOCATION_FIELD)) {
                arguments.add(metadata.getPath().toUri().getPath());
            } else if (fieldName.equals(MetastoreAnalyzeConstants.LOCATIONS_FIELD)) {
                if (metadataType == MetadataType.SEGMENT) {
                    arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
                } else {
                    arguments.add(null);
                }
            } else if (popConfig.getContext().segmentColumns().contains(fieldName)) {
                arguments.add(identifierValues[popConfig.getContext().segmentColumns().indexOf(fieldName)]);
            } else if (AnalyzeColumnUtils.isColumnStatisticsField(fieldName)) {
                arguments.add(metadata.getColumnStatistics(SchemaPath.parseFromString(AnalyzeColumnUtils.getColumnName(fieldName))).get(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
            } else if (AnalyzeColumnUtils.isMetadataStatisticsField(fieldName)) {
                arguments.add(metadata.getStatistic(AnalyzeColumnUtils.getStatisticsKind(fieldName)));
            } else if (fieldName.equals(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD)) {
                // collectedMap field value
                arguments.add(new Object[] {});
            } else if (fieldName.equals(MetastoreAnalyzeConstants.SCHEMA_FIELD)) {
                arguments.add(metadata.getSchema().jsonString());
            } else if (fieldName.equals(columnNamesOptions.lastModifiedTime())) {
                arguments.add(String.valueOf(metadata.getLastModifiedTime()));
            } else if (fieldName.equals(columnNamesOptions.rowGroupIndex())) {
                arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
            } else if (fieldName.equals(columnNamesOptions.rowGroupStart())) {
                arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
            } else if (fieldName.equals(columnNamesOptions.rowGroupLength())) {
                arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
            } else if (fieldName.equals(MetastoreAnalyzeConstants.METADATA_TYPE)) {
                arguments.add(metadataType.name());
            } else {
                throw new UnsupportedOperationException(String.format("Found unexpected field [%s] in incoming batch.", field));
            }
        }
        rowWriter.addRow(arguments.toArray());
    }
    return resultSetLoader.harvest();
}
Also used : AbstractSingleRecordBatch(org.apache.drill.exec.record.AbstractSingleRecordBatch) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Function(java.util.function.Function) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) StreamSupport(java.util.stream.StreamSupport) NONE(org.apache.drill.exec.record.RecordBatch.IterOutcome.NONE) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) Iterator(java.util.Iterator) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) MetadataHandlerPOP(org.apache.drill.exec.physical.config.MetadataHandlerPOP) LocationProvider(org.apache.drill.metastore.metadata.LocationProvider) VarCharVector(org.apache.drill.exec.vector.VarCharVector) Tables(org.apache.drill.metastore.components.tables.Tables) Comparator(java.util.Comparator) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) Collections(java.util.Collections) ArrayList(java.util.ArrayList) MaterializedField(org.apache.drill.exec.record.MaterializedField) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader)

Aggregations

ArrayList (java.util.ArrayList)3 SchemaPath (org.apache.drill.common.expression.SchemaPath)3 BaseMetadata (org.apache.drill.metastore.metadata.BaseMetadata)3 MetadataType (org.apache.drill.metastore.metadata.MetadataType)3 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 Comparator (java.util.Comparator)2 Iterator (java.util.Iterator)2 List (java.util.List)2 Map (java.util.Map)2 Function (java.util.function.Function)2 Collectors (java.util.stream.Collectors)2 StreamSupport (java.util.stream.StreamSupport)2 MinorType (org.apache.drill.common.types.TypeProtos.MinorType)2 Types (org.apache.drill.common.types.Types)2 OutOfMemoryException (org.apache.drill.exec.exception.OutOfMemoryException)2 ColumnNamesOptions (org.apache.drill.exec.metastore.ColumnNamesOptions)2 AnalyzeColumnUtils (org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils)2 MetadataIdentifierUtils (org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils)2 MetastoreAnalyzeConstants (org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants)2