Search in sources :

Example 1 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetadataControllerBatch method getRowGroupMetadata.

private RowGroupMetadata getRowGroupMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 2).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
    int rowGroupIndex = Integer.parseInt(reader.column(columnNamesOptions.rowGroupIndex()).scalar().getString());
    String metadataIdentifier = MetadataIdentifierUtils.getRowGroupMetadataIdentifier(partitionValues, path, rowGroupIndex);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.ROW_GROUP).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    return RowGroupMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).hostAffinity(Collections.emptyMap()).rowGroupIndex(rowGroupIndex).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 2 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetadataControllerBatch method getSegmentMetadata.

private SegmentMetadata getSegmentMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    // and therefore all values should be used when forming metadata identifier
    if (popConfig.getContext().multiValueSegments()) {
        nestingLevel = segmentColumns.size();
    }
    List<String> allPartitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(allPartitionValues);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.SEGMENT).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    int segmentLevel = nestingLevel - 1;
    // for the case of multi-value segments, there is no nesting,
    // so all partition column values should be used
    List<String> partitionValues = popConfig.getContext().multiValueSegments() ? allPartitionValues : Collections.singletonList(allPartitionValues.get(segmentLevel));
    return SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString())).locations(getIncomingLocations(reader)).column(segmentColumns.size() > 0 ? SchemaPath.getSimplePath(segmentColumns.get(segmentLevel)) : null).partitionValues(partitionValues).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 3 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetadataControllerBatch method getPartitionMetadata.

private PartitionMetadata getPartitionMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    List<String> partitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(partitionValues);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.PARTITION).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    return PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).locations(getIncomingLocations(reader)).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 4 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetadataControllerBatch method getFileMetadata.

private FileMetadata getFileMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    List<String> partitionValues = segmentColumns.stream().limit(nestingLevel - 1).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    Path path = new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString());
    String metadataIdentifier = MetadataIdentifierUtils.getFileMetadataIdentifier(partitionValues, path);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.FILE).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    return FileMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(path).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 5 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class AbstractTablesMetastoreTest method testWriteReadAllFieldTypes.

@Test
public void testWriteReadAllFieldTypes() {
    TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name("nation").build();
    TableMetadataUnit tableUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey(MetadataInfo.GENERAL_INFO_KEY).metadataType(MetadataType.TABLE.name()).metadataStatistics(Collections.singletonList("{\"statisticsValue\":2.1," + "\"statisticsKind\":{\"name\":\"approx_count_distinct\"}}")).columnsStatistics(Collections.singletonMap("`name`", "{\"statistics\":[{\"statisticsValue\":\"aaa\"," + "\"statisticsKind\":{\"exact\":true,\"name\":\"minValue\"}},{\"statisticsValue\":\"zzz\"," + "\"statisticsKind\":{\"exact\":true,\"name\":\"maxValue\"}}],\"type\":\"VARCHAR\"}")).lastModifiedTime(System.currentTimeMillis()).build();
    TableMetadataUnit rowGroupUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataKey("1994").metadataIdentifier("1994/Q1/0_0_0.parquet/1").metadataType(MetadataType.ROW_GROUP.name()).lastModifiedTime(System.currentTimeMillis()).location("/tmp/nation").path("/tmp/nation/1/0_0_0.parquet").rowGroupIndex(1).hostAffinity(Collections.singletonMap("host1", 0.1F)).build();
    tables.modify().overwrite(tableUnit, rowGroupUnit).execute();
    List<TableMetadataUnit> tableUnits = tables.read().metadataType(MetadataType.TABLE).filter(tableInfo.toFilter()).execute();
    assertEquals(1, tableUnits.size());
    assertEquals(tableUnit, tableUnits.get(0));
    List<TableMetadataUnit> rowGroupUnits = tables.read().metadataType(MetadataType.ROW_GROUP).filter(tableInfo.toFilter()).execute();
    assertEquals(1, rowGroupUnits.size());
    assertEquals(rowGroupUnit, rowGroupUnits.get(0));
}
Also used : TableInfo(org.apache.drill.metastore.metadata.TableInfo) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Aggregations

TableInfo (org.apache.drill.metastore.metadata.TableInfo)58 MetastoreTest (org.apache.drill.categories.MetastoreTest)50 Test (org.junit.Test)50 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)39 SchemaPath (org.apache.drill.common.expression.SchemaPath)37 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)37 Path (org.apache.hadoop.fs.Path)36 ClusterTest (org.apache.drill.test.ClusterTest)33 SlowTest (org.apache.drill.categories.SlowTest)32 File (java.io.File)29 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)29 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)25 HashMap (java.util.HashMap)24 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)23 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)23 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)23 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)20 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)18 BaseTest (org.apache.drill.test.BaseTest)17 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)16