Search in sources :

Example 1 with SegmentMetadata

use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.

the class BaseParquetMetadataProvider method getSegmentsMetadataMap.

@SuppressWarnings("unused")
@Override
public Map<Path, SegmentMetadata> getSegmentsMetadataMap() {
    if (segments == null) {
        if (entries.isEmpty() || !collectMetadata) {
            return Collections.emptyMap();
        }
        segments = new LinkedHashMap<>();
        Path fileLocation = getFilesMetadataMap().values().iterator().next().getPath();
        int levelsCount = fileLocation.depth() - tableLocation.depth();
        Map<Path, FileMetadata> filesMetadata = getFilesMetadataMap();
        int segmentsIndex = levelsCount - 1;
        Map<Path, SegmentMetadata> segmentMetadata = getSegmentsForMetadata(filesMetadata, SchemaPath.getSimplePath(MetadataInfo.DEFAULT_COLUMN_PREFIX + segmentsIndex));
        segments.putAll(segmentMetadata);
        for (int i = segmentsIndex - 1; i >= 0; i--) {
            String segmentColumn = MetadataInfo.DEFAULT_COLUMN_PREFIX + i;
            segmentMetadata = getMetadataForSegments(segmentMetadata, SchemaPath.getSimplePath(segmentColumn));
            segments.putAll(segmentMetadata);
        }
    }
    return segments;
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ReadEntryWithPath(org.apache.drill.exec.store.dfs.ReadEntryWithPath) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata)

Example 2 with SegmentMetadata

use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.

the class BasicTablesTransformer method all.

public static MetadataHolder all(List<TableMetadataUnit> units) {
    List<BaseTableMetadata> tables = new ArrayList<>();
    List<SegmentMetadata> segments = new ArrayList<>();
    List<FileMetadata> files = new ArrayList<>();
    List<RowGroupMetadata> rowGroups = new ArrayList<>();
    List<PartitionMetadata> partitions = new ArrayList<>();
    for (TableMetadataUnit unit : units) {
        MetadataType metadataType = MetadataType.fromValue(unit.metadataType());
        if (metadataType == null) {
            continue;
        }
        switch(metadataType) {
            case TABLE:
                tables.add(BaseTableMetadata.builder().metadataUnit(unit).build());
                break;
            case SEGMENT:
                segments.add(SegmentMetadata.builder().metadataUnit(unit).build());
                break;
            case FILE:
                files.add(FileMetadata.builder().metadataUnit(unit).build());
                break;
            case ROW_GROUP:
                rowGroups.add(RowGroupMetadata.builder().metadataUnit(unit).build());
                break;
            case PARTITION:
                partitions.add(PartitionMetadata.builder().metadataUnit(unit).build());
                break;
            default:
                // Ignore unsupported type
                break;
        }
    }
    return new MetadataHolder(tables, segments, files, rowGroups, partitions);
}
Also used : ArrayList(java.util.ArrayList) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) MetadataType(org.apache.drill.metastore.metadata.MetadataType) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata)

Example 3 with SegmentMetadata

use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.

the class MetadataControllerBatch method getSegmentMetadata.

private SegmentMetadata getSegmentMetadata(TupleReader reader, List<StatisticsHolder<?>> metadataStatistics, Map<SchemaPath, ColumnStatistics<?>> columnStatistics, int nestingLevel) {
    List<String> segmentColumns = popConfig.getContext().segmentColumns();
    String segmentKey = segmentColumns.size() > 0 ? reader.column(segmentColumns.iterator().next()).scalar().getString() : MetadataInfo.DEFAULT_SEGMENT_KEY;
    // and therefore all values should be used when forming metadata identifier
    if (popConfig.getContext().multiValueSegments()) {
        nestingLevel = segmentColumns.size();
    }
    List<String> allPartitionValues = segmentColumns.stream().limit(nestingLevel).map(columnName -> reader.column(columnName).scalar().getString()).collect(Collectors.toList());
    String metadataIdentifier = MetadataIdentifierUtils.getMetadataIdentifierKey(allPartitionValues);
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.SEGMENT).key(segmentKey).identifier(StringUtils.defaultIfEmpty(metadataIdentifier, null)).build();
    int segmentLevel = nestingLevel - 1;
    // for the case of multi-value segments, there is no nesting,
    // so all partition column values should be used
    List<String> partitionValues = popConfig.getContext().multiValueSegments() ? allPartitionValues : Collections.singletonList(allPartitionValues.get(segmentLevel));
    return SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(columnStatistics).metadataStatistics(metadataStatistics).path(new Path(reader.column(MetastoreAnalyzeConstants.LOCATION_FIELD).scalar().getString())).locations(getIncomingLocations(reader)).column(segmentColumns.size() > 0 ? SchemaPath.getSimplePath(segmentColumns.get(segmentLevel)) : null).partitionValues(partitionValues).lastModifiedTime(Long.parseLong(reader.column(columnNamesOptions.lastModifiedTime()).scalar().getString())).schema(TupleMetadata.of(reader.column(MetastoreAnalyzeConstants.SCHEMA_FIELD).scalar().getString())).build();
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) MetastoreColumn(org.apache.drill.metastore.MetastoreColumn) UserException(org.apache.drill.common.exceptions.UserException) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) StringUtils(org.apache.commons.lang3.StringUtils) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) StatisticsRecordWriterImpl(org.apache.drill.exec.store.StatisticsRecordWriterImpl) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) FieldConverter(org.apache.drill.exec.store.EventBasedRecordWriter.FieldConverter) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) Set(java.util.Set) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) FieldReader(org.apache.drill.exec.vector.complex.reader.FieldReader) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) AbstractBinaryRecordBatch(org.apache.drill.exec.record.AbstractBinaryRecordBatch) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) TupleReader(org.apache.drill.exec.vector.accessor.TupleReader) Modify(org.apache.drill.metastore.operate.Modify) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) HashMap(java.util.HashMap) BitVector(org.apache.drill.exec.vector.BitVector) Function(java.util.function.Function) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) ArrayList(java.util.ArrayList) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) HashSet(java.util.HashSet) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) DrillStatsTable(org.apache.drill.exec.planner.common.DrillStatsTable) WriterPrel(org.apache.drill.exec.planner.physical.WriterPrel) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) StatisticsRecordCollector(org.apache.drill.exec.store.StatisticsRecordCollector) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) IOException(java.io.IOException) FilterExpression(org.apache.drill.metastore.expressions.FilterExpression) StatisticsCollectorImpl(org.apache.drill.exec.store.easy.json.StatisticsCollectorImpl) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) ParquetTableMetadataUtils(org.apache.drill.exec.store.parquet.ParquetTableMetadataUtils) VarCharVector(org.apache.drill.exec.vector.VarCharVector) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) MetadataControllerPOP(org.apache.drill.exec.physical.config.MetadataControllerPOP) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) ObjectType(org.apache.drill.exec.vector.accessor.ObjectType) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo)

Example 4 with SegmentMetadata

use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.

the class TestInfoSchemaWithMetastore method testPartitions.

@Test
public void testPartitions() throws Exception {
    String tableName = "table_with_partitions";
    ZonedDateTime currentTime = currentUtcTime();
    TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name(tableName).type("PARQUET").build();
    SegmentMetadata defaultSegment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key(MetadataInfo.DEFAULT_SEGMENT_KEY).build()).path(new Path("/tmp", tableName)).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
    SegmentMetadata segment = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).key("part_int=3").identifier("part_int=3").build()).column(SchemaPath.parseFromString("dir0")).partitionValues(Collections.singletonList("part_int=3")).path(new Path(String.format("/tmp/%s/part_int=3", tableName))).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
    PartitionMetadata partition = PartitionMetadata.builder().tableInfo(tableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.PARTITION).key("part_int=3").identifier("part_int=3/part_varchar=g").build()).column(SchemaPath.parseFromString("part_varchar")).partitionValues(Collections.singletonList("g")).locations(Collections.emptySet()).metadataStatistics(Collections.emptyList()).columnsStatistics(Collections.emptyMap()).lastModifiedTime(currentTime.toInstant().toEpochMilli()).build();
    metastore.tables().modify().overwrite(defaultSegment.toMetadataUnit(), segment.toMetadataUnit(), partition.toMetadataUnit()).execute();
    List<String> columns = Arrays.asList(InfoSchemaConstants.SHRD_COL_TABLE_CATALOG, InfoSchemaConstants.SHRD_COL_TABLE_SCHEMA, InfoSchemaConstants.SHRD_COL_TABLE_NAME, InfoSchemaConstants.PARTITIONS_COL_METADATA_KEY, InfoSchemaConstants.PARTITIONS_COL_METADATA_TYPE, InfoSchemaConstants.PARTITIONS_COL_METADATA_IDENTIFIER, InfoSchemaConstants.PARTITIONS_COL_PARTITION_COLUMN, InfoSchemaConstants.PARTITIONS_COL_PARTITION_VALUE, InfoSchemaConstants.PARTITIONS_COL_LOCATION, InfoSchemaConstants.PARTITIONS_COL_LAST_MODIFIED_TIME);
    client.testBuilder().sqlQuery("select %s from information_schema.`partitions` where table_name = '%s'", String.join(", ", columns), tableName).unOrdered().baselineColumns(columns.toArray(new String[0])).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.SEGMENT.name(), "part_int=3", "`dir0`", "part_int=3", "/tmp/table_with_partitions/part_int=3", currentTime.toLocalDateTime()).baselineValues("DRILL", "dfs.tmp", tableName, "part_int=3", MetadataType.PARTITION.name(), "part_int=3/part_varchar=g", "`part_varchar`", "g", null, currentTime.toLocalDateTime()).go();
}
Also used : SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ZonedDateTime(java.time.ZonedDateTime) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) ClusterTest(org.apache.drill.test.ClusterTest) Test(org.junit.Test) UnlikelyTest(org.apache.drill.categories.UnlikelyTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) SqlTest(org.apache.drill.categories.SqlTest)

Example 5 with SegmentMetadata

use of org.apache.drill.metastore.metadata.SegmentMetadata in project drill by apache.

the class TestMetastoreCommands method testSimpleAnalyze.

@Test
public void testSimpleAnalyze() throws Exception {
    String tableName = "multilevel/parquetSimpleAnalyze";
    TableInfo tableInfo = getTableInfo(tableName, "default");
    File table = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel/parquet"), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    BaseTableMetadata expectedTableMetadata = getBaseTableMetadata(tableInfo, table);
    TableInfo baseTableInfo = TableInfo.builder().name(tableName).storagePlugin("dfs").workspace("default").build();
    SegmentMetadata dir0 = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(tablePath, "1994")).schema(SCHEMA).lastModifiedTime(getMaxLastModified(new File(table, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(DIR0_1994_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.parquet"), new Path(tablePath, "1994/Q2/orders_94_q2.parquet"), new Path(tablePath, "1994/Q3/orders_94_q3.parquet"), new Path(tablePath, "1994/Q4/orders_94_q4.parquet"))).partitionValues(Collections.singletonList("1994")).build();
    Set<Path> expectedTopLevelSegmentLocations = ImmutableSet.of(new Path(tablePath, "1994"), new Path(tablePath, "1995"), new Path(tablePath, "1996"));
    Set<Set<Path>> expectedSegmentFilesLocations = new HashSet<>();
    Set<Path> segmentFiles = ImmutableSet.of(new Path(tablePath, "1994/Q2/orders_94_q2.parquet"), new Path(tablePath, "1994/Q4/orders_94_q4.parquet"), new Path(tablePath, "1994/Q1/orders_94_q1.parquet"), new Path(tablePath, "1994/Q3/orders_94_q3.parquet"));
    expectedSegmentFilesLocations.add(segmentFiles);
    segmentFiles = ImmutableSet.of(new Path(tablePath, "1995/Q2/orders_95_q2.parquet"), new Path(tablePath, "1995/Q4/orders_95_q4.parquet"), new Path(tablePath, "1995/Q1/orders_95_q1.parquet"), new Path(tablePath, "1995/Q3/orders_95_q3.parquet"));
    expectedSegmentFilesLocations.add(segmentFiles);
    segmentFiles = ImmutableSet.of(new Path(tablePath, "1996/Q3/orders_96_q3.parquet"), new Path(tablePath, "1996/Q2/orders_96_q2.parquet"), new Path(tablePath, "1996/Q4/orders_96_q4.parquet"), new Path(tablePath, "1996/Q1/orders_96_q1.parquet"));
    expectedSegmentFilesLocations.add(segmentFiles);
    long dir0q1lastModified = new File(new File(new File(table, "1994"), "Q1"), "orders_94_q1.parquet").lastModified();
    FileMetadata dir01994q1File = FileMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.parquet").key("1994").build()).schema(SCHEMA).lastModifiedTime(dir0q1lastModified).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(tablePath, "1994/Q1/orders_94_q1.parquet")).build();
    RowGroupMetadata dir01994q1rowGroup = RowGroupMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.ROW_GROUP).identifier("1994/Q1/orders_94_q1.parquet/0").key("1994").build()).schema(SCHEMA).rowGroupIndex(0).hostAffinity(Collections.emptyMap()).lastModifiedTime(dir0q1lastModified).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Arrays.asList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(1196L, new BaseStatisticsKind<>(ExactStatisticsConstants.LENGTH, true)), new StatisticsHolder<>(4L, new BaseStatisticsKind<>(ExactStatisticsConstants.START, true)))).path(new Path(tablePath, "1994/Q1/orders_94_q1.parquet")).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE dfs.`%s` REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        List<SegmentMetadata> topSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir0`");
        SegmentMetadata actualDir0Metadata = topSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994")).findAny().orElseThrow(() -> new AssertionError("Segment is absent"));
        Set<Path> locations = actualDir0Metadata.getLocations();
        actualDir0Metadata.toBuilder().locations(locations);
        assertEquals(dir0, actualDir0Metadata);
        Set<Path> topLevelSegmentLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocation).collect(Collectors.toSet());
        // verify top segments locations
        assertEquals(expectedTopLevelSegmentLocations, topLevelSegmentLocations);
        Set<Set<Path>> segmentFilesLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocations).collect(Collectors.toSet());
        assertEquals(expectedSegmentFilesLocations, segmentFilesLocations);
        // verify nested segments
        List<SegmentMetadata> nestedSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir1`");
        assertEquals(12, nestedSegmentMetadata.size());
        SegmentMetadata dir01994q1Segment = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(tablePath, "1994"), "Q1")).schema(SCHEMA).lastModifiedTime(getMaxLastModified(new File(new File(table, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.parquet"))).partitionValues(Collections.singletonList("Q1")).build();
        // verify segment for 1994
        assertEquals(dir01994q1Segment, nestedSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1")).findAny().orElse(null));
        // verify files metadata
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        // verify first file metadata
        assertEquals(dir01994q1File, filesMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.parquet")).findAny().orElse(null));
        // verify row groups metadata
        List<RowGroupMetadata> rowGroupsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().rowGroupsMetadata(tableInfo, null, (String) null);
        assertEquals(12, rowGroupsMetadata.size());
        // verify first row group dir01994q1rowGroup
        assertEquals(dir01994q1rowGroup, rowGroupsMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.parquet/0")).findAny().orElse(null));
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) Arrays(java.util.Arrays) ClusterTest(org.apache.drill.test.ClusterTest) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SlowTest(org.apache.drill.categories.SlowTest) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assert.fail(org.junit.Assert.fail) AnalyzeParquetInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeParquetInfoProvider) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Category(org.junit.experimental.categories.Category) Instant(java.time.Instant) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) MetastoreTest(org.apache.drill.categories.MetastoreTest) ExecConstants(org.apache.drill.exec.ExecConstants) ClusterFixtureBuilder(org.apache.drill.test.ClusterFixtureBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) HashSet(java.util.HashSet) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) ClusterFixture(org.apache.drill.test.ClusterFixture) Assert.assertNotNull(org.junit.Assert.assertNotNull) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) UserRemoteException(org.apache.drill.common.exceptions.UserRemoteException) Assert.assertTrue(org.junit.Assert.assertTrue) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) File(java.io.File) ImmutableMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap) Rule(org.junit.Rule) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Paths(java.nio.file.Paths) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Set(java.util.Set) HashSet(java.util.HashSet) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) HashSet(java.util.HashSet) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Aggregations

SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)21 TableInfo (org.apache.drill.metastore.metadata.TableInfo)17 MetastoreTest (org.apache.drill.categories.MetastoreTest)16 Test (org.junit.Test)16 SchemaPath (org.apache.drill.common.expression.SchemaPath)15 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)15 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)15 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)15 ClusterTest (org.apache.drill.test.ClusterTest)15 Path (org.apache.hadoop.fs.Path)15 File (java.io.File)14 SlowTest (org.apache.drill.categories.SlowTest)14 HashMap (java.util.HashMap)11 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)11 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)11 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)11 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)10 MetadataType (org.apache.drill.metastore.metadata.MetadataType)8 Collections (java.util.Collections)7 List (java.util.List)7