Search in sources :

Example 26 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class MetastoreAnalyzeTableHandler method convertToDrel.

/**
 * Converts to Drill logical plan
 */
private DrillRel convertToDrel(RelNode relNode, SqlMetastoreAnalyzeTable sqlAnalyzeTable, DrillTableInfo drillTableInfo) throws ForemanSetupException, IOException {
    RelBuilder relBuilder = LOGICAL_BUILDER.create(relNode.getCluster(), null);
    DrillTable table = drillTableInfo.drillTable();
    AnalyzeInfoProvider analyzeInfoProvider = table.getGroupScan().getAnalyzeInfoProvider();
    List<String> schemaPath = drillTableInfo.schemaPath();
    String pluginName = schemaPath.get(0);
    String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
    String tableName = drillTableInfo.tableName();
    TableInfo tableInfo = TableInfo.builder().name(tableName).owner(table.getUserName()).type(analyzeInfoProvider.getTableTypeName()).storagePlugin(pluginName).workspace(workspaceName).build();
    ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(context.getOptions());
    List<String> segmentColumns = analyzeInfoProvider.getSegmentColumns(table, columnNamesOptions).stream().map(SchemaPath::getRootSegmentPath).collect(Collectors.toList());
    List<NamedExpression> segmentExpressions = segmentColumns.stream().map(partitionName -> new NamedExpression(SchemaPath.getSimplePath(partitionName), FieldReference.getWithQuotedRef(partitionName))).collect(Collectors.toList());
    List<MetadataInfo> rowGroupsInfo = Collections.emptyList();
    List<MetadataInfo> filesInfo = Collections.emptyList();
    Multimap<Integer, MetadataInfo> segments = ArrayListMultimap.create();
    BasicTablesRequests basicRequests;
    try {
        basicRequests = context.getMetastoreRegistry().get().tables().basicRequests();
    } catch (MetastoreException e) {
        logger.error("Error when obtaining Metastore instance for table {}", tableName, e);
        DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, e.getMessage()).build());
        return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
    }
    MetadataType metadataLevel = getMetadataType(sqlAnalyzeTable);
    List<SchemaPath> interestingColumns = sqlAnalyzeTable.getFieldNames();
    MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(tableInfo);
    List<MetadataInfo> allMetaToHandle = null;
    List<MetadataInfo> metadataToRemove = new ArrayList<>();
    // whether incremental analyze may be produced
    if (metastoreTableInfo.isExists()) {
        RelNode finalRelNode = relNode;
        CheckedSupplier<TableScan, SqlUnsupportedException> tableScanSupplier = () -> DrillRelOptUtil.findScan(convertToDrel(finalRelNode.getInput(0)));
        MetadataInfoCollector metadataInfoCollector = analyzeInfoProvider.getMetadataInfoCollector(basicRequests, tableInfo, (FormatSelection) table.getSelection(), context.getPlannerSettings(), tableScanSupplier, interestingColumns, metadataLevel, segmentColumns.size());
        if (!metadataInfoCollector.isOutdated()) {
            DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, "Table metadata is up to date, analyze wasn't performed.").build());
            return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
        }
        // updates scan to read updated / new files, pass removed files into metadata handler
        relNode = relNode.copy(relNode.getTraitSet(), Collections.singletonList(metadataInfoCollector.getPrunedScan()));
        filesInfo = metadataInfoCollector.getFilesInfo();
        segments = metadataInfoCollector.getSegmentsInfo();
        rowGroupsInfo = metadataInfoCollector.getRowGroupsInfo();
        allMetaToHandle = metadataInfoCollector.getAllMetaToHandle();
        metadataToRemove = metadataInfoCollector.getMetadataToRemove();
    }
    // Step 2: constructs plan for producing analyze
    DrillRel convertedRelNode = convertToRawDrel(relNode);
    boolean createNewAggregations = true;
    // List of columns for which statistics should be collected: interesting columns + segment columns
    List<SchemaPath> statisticsColumns = interestingColumns == null ? null : new ArrayList<>(interestingColumns);
    if (statisticsColumns != null) {
        segmentColumns.stream().map(SchemaPath::getSimplePath).forEach(statisticsColumns::add);
    }
    SchemaPath locationField = analyzeInfoProvider.getLocationField(columnNamesOptions);
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.ROW_GROUP) && metadataLevel.includes(MetadataType.ROW_GROUP)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(rowGroupsInfo).metadataType(MetadataType.ROW_GROUP).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getRowGroupAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, handlerContext);
        createNewAggregations = false;
        locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.FILE) && metadataLevel.includes(MetadataType.FILE)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(filesInfo).metadataType(MetadataType.FILE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getFileAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
        locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
        createNewAggregations = false;
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.SEGMENT) && metadataLevel.includes(MetadataType.SEGMENT)) {
        for (int i = segmentExpressions.size(); i > 0; i--) {
            MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(new ArrayList<>(segments.get(i - 1))).metadataType(MetadataType.SEGMENT).depthLevel(i).segmentColumns(segmentColumns.subList(0, i)).build();
            convertedRelNode = getSegmentAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, i, handlerContext);
            locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
            createNewAggregations = false;
        }
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.TABLE) && metadataLevel.includes(MetadataType.TABLE)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(Collections.emptyList()).metadataType(MetadataType.TABLE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getTableAggRelNode(convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
    } else {
        throw new IllegalStateException("Analyze table with NONE level");
    }
    boolean useStatistics = context.getOptions().getOption(PlannerSettings.STATISTICS_USE);
    SqlNumericLiteral samplePercentLiteral = sqlAnalyzeTable.getSamplePercent();
    double samplePercent = samplePercentLiteral == null ? 100.0 : samplePercentLiteral.intValue(true);
    // Step 3: adds rel nodes for producing statistics analyze if required
    RelNode analyzeRel = useStatistics ? new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertToRawDrel(relNode), samplePercent) : convertToRawDrel(relBuilder.values(new String[] { "" }, "").build());
    MetadataControllerContext metadataControllerContext = MetadataControllerContext.builder().tableInfo(tableInfo).metastoreTableInfo(metastoreTableInfo).location(((FormatSelection) table.getSelection()).getSelection().getSelectionRoot()).interestingColumns(interestingColumns).segmentColumns(segmentColumns).metadataToHandle(allMetaToHandle).metadataToRemove(metadataToRemove).analyzeMetadataLevel(metadataLevel).build();
    convertedRelNode = new MetadataControllerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, analyzeRel, metadataControllerContext);
    return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) UserException(org.apache.drill.common.exceptions.UserException) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) LoggerFactory(org.slf4j.LoggerFactory) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) MetadataHandlerContext(org.apache.drill.exec.metastore.analyze.MetadataHandlerContext) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) SqlNode(org.apache.calcite.sql.SqlNode) RelBuilder(org.apache.calcite.tools.RelBuilder) FieldReference(org.apache.drill.common.expression.FieldReference) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) SqlSelect(org.apache.calcite.sql.SqlSelect) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) ValidationException(org.apache.calcite.tools.ValidationException) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) SqlNumericLiteral(org.apache.calcite.sql.SqlNumericLiteral) MetadataInfoCollector(org.apache.drill.exec.metastore.analyze.MetadataInfoCollector) ExecConstants(org.apache.drill.exec.ExecConstants) SqlMetastoreAnalyzeTable(org.apache.drill.exec.planner.sql.parser.SqlMetastoreAnalyzeTable) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext) TableScan(org.apache.calcite.rel.core.TableScan) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) Pointer(org.apache.drill.exec.util.Pointer) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) SqlLiteral(org.apache.calcite.sql.SqlLiteral) SqlUnsupportedException(org.apache.drill.exec.work.foreman.SqlUnsupportedException) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) DrillRelOptUtil(org.apache.drill.exec.planner.common.DrillRelOptUtil) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) CheckedSupplier(org.apache.drill.common.util.function.CheckedSupplier) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) Logger(org.slf4j.Logger) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) IOException(java.io.IOException) RelNode(org.apache.calcite.rel.RelNode) Prel(org.apache.drill.exec.planner.physical.Prel) AbstractSchema(org.apache.drill.exec.store.AbstractSchema) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelConversionException(org.apache.calcite.tools.RelConversionException) Strings(org.apache.parquet.Strings) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) LOGICAL_BUILDER(org.apache.drill.exec.planner.logical.DrillRelFactories.LOGICAL_BUILDER) SqlNodeList(org.apache.calcite.sql.SqlNodeList) MetadataControllerRel(org.apache.drill.exec.planner.logical.MetadataControllerRel) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) AnalyzeInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) AnalyzeInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider) ArrayList(java.util.ArrayList) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) SqlUnsupportedException(org.apache.drill.exec.work.foreman.SqlUnsupportedException) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) MetadataHandlerContext(org.apache.drill.exec.metastore.analyze.MetadataHandlerContext) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) MetadataControllerRel(org.apache.drill.exec.planner.logical.MetadataControllerRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) TableScan(org.apache.calcite.rel.core.TableScan) RelBuilder(org.apache.calcite.tools.RelBuilder) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) MetadataType(org.apache.drill.metastore.metadata.MetadataType) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel) RelNode(org.apache.calcite.rel.RelNode) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) MetadataInfoCollector(org.apache.drill.exec.metastore.analyze.MetadataInfoCollector) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) SqlNumericLiteral(org.apache.calcite.sql.SqlNumericLiteral)

Example 27 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testIncrementalAnalyzeUnchangedTable.

@Test
public void testIncrementalAnalyzeUnchangedTable() throws Exception {
    String tableName = "multilevel/csvUnchanged";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/csv"), Paths.get(tableName));
    TableInfo tableInfo = getTableInfo(tableName, "tmp", "csv");
    long lastModifiedTime = getMaxLastModified(table);
    try {
        testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(false, "Table metadata is up to date, analyze wasn't performed.").go();
        segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentMetadata.size());
        long postAnalyzeLastModifiedTime = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo).lastModifiedTime();
        assertEquals(lastModifiedTime, postAnalyzeLastModifiedTime);
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        FileUtils.deleteQuietly(table);
    }
}
Also used : SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 28 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testAnalyzeOnTextTable.

@Test
public void testAnalyzeOnTextTable() throws Exception {
    String tableName = "multilevel/csv";
    TableInfo tableInfo = getTableInfo(tableName, "default", "csv");
    File table = dirTestWatcher.copyResourceToRoot(Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    BaseTableMetadata expectedTableMetadata = getBaseTableMetadata(tableInfo, table, SCHEMA);
    TableInfo baseTableInfo = TableInfo.builder().name(tableName).storagePlugin("dfs").workspace("default").build();
    Map<SchemaPath, ColumnStatistics<?>> dir0CSVStats = new HashMap<>(DIR0_1994_SEGMENT_COLUMN_STATISTICS);
    dir0CSVStats.put(SchemaPath.getSimplePath("o_comment"), getColumnStatistics(" accounts nag slyly. ironic", "yly final requests over the furiously regula", 40L, TypeProtos.MinorType.VARCHAR));
    SegmentMetadata dir0 = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994").key("1994").build()).path(new Path(tablePath, "1994")).schema(SCHEMA).lastModifiedTime(getMaxLastModified(new File(table, "1994"))).column(SchemaPath.getSimplePath("dir0")).columnsStatistics(dir0CSVStats).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(40L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.csv"), new Path(tablePath, "1994/Q2/orders_94_q2.csv"), new Path(tablePath, "1994/Q3/orders_94_q3.csv"), new Path(tablePath, "1994/Q4/orders_94_q4.csv"))).partitionValues(Collections.singletonList("1994")).build();
    Set<Path> expectedTopLevelSegmentLocations = ImmutableSet.of(new Path(tablePath, "1994"), new Path(tablePath, "1995"), new Path(tablePath, "1996"));
    Set<Set<Path>> expectedSegmentFilesLocations = new HashSet<>();
    Set<Path> segmentFiles = ImmutableSet.of(new Path(tablePath, "1994/Q2/orders_94_q2.csv"), new Path(tablePath, "1994/Q4/orders_94_q4.csv"), new Path(tablePath, "1994/Q1/orders_94_q1.csv"), new Path(tablePath, "1994/Q3/orders_94_q3.csv"));
    expectedSegmentFilesLocations.add(segmentFiles);
    segmentFiles = ImmutableSet.of(new Path(tablePath, "1995/Q2/orders_95_q2.csv"), new Path(tablePath, "1995/Q4/orders_95_q4.csv"), new Path(tablePath, "1995/Q1/orders_95_q1.csv"), new Path(tablePath, "1995/Q3/orders_95_q3.csv"));
    expectedSegmentFilesLocations.add(segmentFiles);
    segmentFiles = ImmutableSet.of(new Path(tablePath, "1996/Q3/orders_96_q3.csv"), new Path(tablePath, "1996/Q2/orders_96_q2.csv"), new Path(tablePath, "1996/Q4/orders_96_q4.csv"), new Path(tablePath, "1996/Q1/orders_96_q1.csv"));
    expectedSegmentFilesLocations.add(segmentFiles);
    long dir0q1lastModified = new File(new File(new File(table, "1994"), "Q1"), "orders_94_q1.csv").lastModified();
    FileMetadata dir01994q1File = FileMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.FILE).identifier("1994/Q1/orders_94_q1.csv").key("1994").build()).schema(SCHEMA).lastModifiedTime(dir0q1lastModified).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).path(new Path(tablePath, "1994/Q1/orders_94_q1.csv")).build();
    try {
        testBuilder().sqlQuery("analyze table table(dfs.`%s`(schema=>%s)) refresh metadata", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.default.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        List<SegmentMetadata> topSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir0`");
        SegmentMetadata actualDir0Metadata = topSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994")).findAny().orElseThrow(() -> new AssertionError("Segment is absent"));
        Set<Path> locations = actualDir0Metadata.getLocations();
        actualDir0Metadata.toBuilder().locations(locations);
        assertEquals(dir0, actualDir0Metadata);
        Set<Path> topLevelSegmentLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocation).collect(Collectors.toSet());
        // verify top segments locations
        assertEquals(expectedTopLevelSegmentLocations, topLevelSegmentLocations);
        Set<Set<Path>> segmentFilesLocations = topSegmentMetadata.stream().map(SegmentMetadata::getLocations).collect(Collectors.toSet());
        assertEquals(expectedSegmentFilesLocations, segmentFilesLocations);
        // verify nested segments
        List<SegmentMetadata> nestedSegmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByColumn(tableInfo, null, "`dir1`");
        assertEquals(12, nestedSegmentMetadata.size());
        SegmentMetadata dir01994q1Segment = SegmentMetadata.builder().tableInfo(baseTableInfo).metadataInfo(MetadataInfo.builder().type(MetadataType.SEGMENT).identifier("1994/Q1").key("1994").build()).path(new Path(new Path(tablePath, "1994"), "Q1")).schema(SCHEMA).lastModifiedTime(getMaxLastModified(new File(new File(table, "1994"), "Q1"))).column(SchemaPath.getSimplePath("dir1")).columnsStatistics(DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS).metadataStatistics(Collections.singletonList(new StatisticsHolder<>(10L, TableStatisticsKind.ROW_COUNT))).locations(ImmutableSet.of(new Path(tablePath, "1994/Q1/orders_94_q1.csv"))).partitionValues(Collections.singletonList("Q1")).build();
        // verify segment for 1994
        assertEquals(dir01994q1Segment, nestedSegmentMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1")).findAny().orElse(null));
        // verify files metadata
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        // verify first file metadata
        assertEquals(dir01994q1File, filesMetadata.stream().filter(unit -> unit.getMetadataInfo().identifier().equals("1994/Q1/orders_94_q1.csv")).findAny().orElse(null));
    } finally {
        run("analyze table dfs.`%s` drop metadata if exists", tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) ClusterTest(org.apache.drill.test.ClusterTest) TABLE_META_INFO(org.apache.drill.exec.sql.TestMetastoreCommands.TABLE_META_INFO) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TABLE_COLUMN_STATISTICS(org.apache.drill.exec.sql.TestMetastoreCommands.TABLE_COLUMN_STATISTICS) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SlowTest(org.apache.drill.categories.SlowTest) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Category(org.junit.experimental.categories.Category) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) TestMetastoreCommands.getMaxLastModified(org.apache.drill.exec.sql.TestMetastoreCommands.getMaxLastModified) MetastoreTest(org.apache.drill.categories.MetastoreTest) ExecConstants(org.apache.drill.exec.ExecConstants) ClusterFixtureBuilder(org.apache.drill.test.ClusterFixtureBuilder) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) HashMap(java.util.HashMap) HashSet(java.util.HashSet) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) ClusterFixture(org.apache.drill.test.ClusterFixture) Assert.assertTrue(org.junit.Assert.assertTrue) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) File(java.io.File) ImmutableMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap) Rule(org.junit.Rule) DIR0_1994_SEGMENT_COLUMN_STATISTICS(org.apache.drill.exec.sql.TestMetastoreCommands.DIR0_1994_SEGMENT_COLUMN_STATISTICS) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Paths(java.nio.file.Paths) DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS(org.apache.drill.exec.sql.TestMetastoreCommands.DIR0_1994_Q1_SEGMENT_COLUMN_STATISTICS) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Set(java.util.Set) HashSet(java.util.HashSet) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) HashMap(java.util.HashMap) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) HashSet(java.util.HashSet) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 29 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class TestMetastoreWithEasyFormatPlugin method testIncrementalAnalyzeNewFile.

@Test
public void testIncrementalAnalyzeNewFile() throws Exception {
    String tableName = "multilevel/csvNewFile";
    File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/csv"), Paths.get(tableName));
    Path tablePath = new Path(table.toURI().getPath());
    TableInfo tableInfo = getTableInfo(tableName, "tmp", "csv");
    // updates statistics values due to new segment
    Map<SchemaPath, ColumnStatistics<?>> updatedStatistics = new HashMap<>(TABLE_COLUMN_STATISTICS);
    updatedStatistics.replaceAll((logicalExpressions, columnStatistics) -> columnStatistics.cloneWith(new ColumnStatistics<>(Arrays.asList(new StatisticsHolder<>(130L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(130L, ColumnStatisticsKind.NON_NULL_VALUES_COUNT)))));
    BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(SCHEMA).location(tablePath).columnsStatistics(updatedStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(130L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
    try {
        testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        List<SegmentMetadata> segmentsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        assertEquals(15, segmentsMetadata.size());
        List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(12, filesMetadata.size());
        dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel", "csv", "1994", "Q4", "orders_94_q4.csv"), Paths.get(tableName, "1994", "Q4", "orders_94_q4_1.csv"));
        testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
        BaseTableMetadata actualTableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
        assertEquals(expectedTableMetadata, actualTableMetadata);
        segmentsMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
        // verifies that segments count left unchanged
        assertEquals(15, segmentsMetadata.size());
        filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
        assertEquals(13, filesMetadata.size());
    } finally {
        run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        FileUtils.deleteQuietly(table);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TestMetastoreCommands.getColumnStatistics(org.apache.drill.exec.sql.TestMetastoreCommands.getColumnStatistics) HashMap(java.util.HashMap) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TestMetastoreCommands.getBaseTableMetadata(org.apache.drill.exec.sql.TestMetastoreCommands.getBaseTableMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) File(java.io.File) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 30 with TableInfo

use of org.apache.drill.metastore.metadata.TableInfo in project drill by apache.

the class AbstractBasicTablesRequestsTest method testTableMetadataAbsent.

@Test
public void testTableMetadataAbsent() {
    TableInfo tableInfo = TableInfo.builder().storagePlugin("dfs").workspace("tmp").name("absent").build();
    BaseTableMetadata tableMetadata = basicRequests.tableMetadata(tableInfo);
    assertNull(tableMetadata);
}
Also used : BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Aggregations

TableInfo (org.apache.drill.metastore.metadata.TableInfo)58 MetastoreTest (org.apache.drill.categories.MetastoreTest)50 Test (org.junit.Test)50 MetastoreTableInfo (org.apache.drill.metastore.components.tables.MetastoreTableInfo)39 SchemaPath (org.apache.drill.common.expression.SchemaPath)37 BaseTableMetadata (org.apache.drill.metastore.metadata.BaseTableMetadata)37 Path (org.apache.hadoop.fs.Path)36 ClusterTest (org.apache.drill.test.ClusterTest)33 SlowTest (org.apache.drill.categories.SlowTest)32 File (java.io.File)29 ColumnStatistics (org.apache.drill.metastore.statistics.ColumnStatistics)29 CoreMatchers.containsString (org.hamcrest.CoreMatchers.containsString)25 HashMap (java.util.HashMap)24 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)23 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)23 StatisticsHolder (org.apache.drill.metastore.statistics.StatisticsHolder)23 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)20 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)18 BaseTest (org.apache.drill.test.BaseTest)17 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)16