Search in sources :

Example 6 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class MetastoreAnalyzeTableHandler method convertToDrel.

/**
 * Converts to Drill logical plan
 */
private DrillRel convertToDrel(RelNode relNode, SqlMetastoreAnalyzeTable sqlAnalyzeTable, DrillTableInfo drillTableInfo) throws ForemanSetupException, IOException {
    RelBuilder relBuilder = LOGICAL_BUILDER.create(relNode.getCluster(), null);
    DrillTable table = drillTableInfo.drillTable();
    AnalyzeInfoProvider analyzeInfoProvider = table.getGroupScan().getAnalyzeInfoProvider();
    List<String> schemaPath = drillTableInfo.schemaPath();
    String pluginName = schemaPath.get(0);
    String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
    String tableName = drillTableInfo.tableName();
    TableInfo tableInfo = TableInfo.builder().name(tableName).owner(table.getUserName()).type(analyzeInfoProvider.getTableTypeName()).storagePlugin(pluginName).workspace(workspaceName).build();
    ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(context.getOptions());
    List<String> segmentColumns = analyzeInfoProvider.getSegmentColumns(table, columnNamesOptions).stream().map(SchemaPath::getRootSegmentPath).collect(Collectors.toList());
    List<NamedExpression> segmentExpressions = segmentColumns.stream().map(partitionName -> new NamedExpression(SchemaPath.getSimplePath(partitionName), FieldReference.getWithQuotedRef(partitionName))).collect(Collectors.toList());
    List<MetadataInfo> rowGroupsInfo = Collections.emptyList();
    List<MetadataInfo> filesInfo = Collections.emptyList();
    Multimap<Integer, MetadataInfo> segments = ArrayListMultimap.create();
    BasicTablesRequests basicRequests;
    try {
        basicRequests = context.getMetastoreRegistry().get().tables().basicRequests();
    } catch (MetastoreException e) {
        logger.error("Error when obtaining Metastore instance for table {}", tableName, e);
        DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, e.getMessage()).build());
        return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
    }
    MetadataType metadataLevel = getMetadataType(sqlAnalyzeTable);
    List<SchemaPath> interestingColumns = sqlAnalyzeTable.getFieldNames();
    MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(tableInfo);
    List<MetadataInfo> allMetaToHandle = null;
    List<MetadataInfo> metadataToRemove = new ArrayList<>();
    // whether incremental analyze may be produced
    if (metastoreTableInfo.isExists()) {
        RelNode finalRelNode = relNode;
        CheckedSupplier<TableScan, SqlUnsupportedException> tableScanSupplier = () -> DrillRelOptUtil.findScan(convertToDrel(finalRelNode.getInput(0)));
        MetadataInfoCollector metadataInfoCollector = analyzeInfoProvider.getMetadataInfoCollector(basicRequests, tableInfo, (FormatSelection) table.getSelection(), context.getPlannerSettings(), tableScanSupplier, interestingColumns, metadataLevel, segmentColumns.size());
        if (!metadataInfoCollector.isOutdated()) {
            DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, "Table metadata is up to date, analyze wasn't performed.").build());
            return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
        }
        // updates scan to read updated / new files, pass removed files into metadata handler
        relNode = relNode.copy(relNode.getTraitSet(), Collections.singletonList(metadataInfoCollector.getPrunedScan()));
        filesInfo = metadataInfoCollector.getFilesInfo();
        segments = metadataInfoCollector.getSegmentsInfo();
        rowGroupsInfo = metadataInfoCollector.getRowGroupsInfo();
        allMetaToHandle = metadataInfoCollector.getAllMetaToHandle();
        metadataToRemove = metadataInfoCollector.getMetadataToRemove();
    }
    // Step 2: constructs plan for producing analyze
    DrillRel convertedRelNode = convertToRawDrel(relNode);
    boolean createNewAggregations = true;
    // List of columns for which statistics should be collected: interesting columns + segment columns
    List<SchemaPath> statisticsColumns = interestingColumns == null ? null : new ArrayList<>(interestingColumns);
    if (statisticsColumns != null) {
        segmentColumns.stream().map(SchemaPath::getSimplePath).forEach(statisticsColumns::add);
    }
    SchemaPath locationField = analyzeInfoProvider.getLocationField(columnNamesOptions);
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.ROW_GROUP) && metadataLevel.includes(MetadataType.ROW_GROUP)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(rowGroupsInfo).metadataType(MetadataType.ROW_GROUP).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getRowGroupAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, handlerContext);
        createNewAggregations = false;
        locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.FILE) && metadataLevel.includes(MetadataType.FILE)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(filesInfo).metadataType(MetadataType.FILE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getFileAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
        locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
        createNewAggregations = false;
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.SEGMENT) && metadataLevel.includes(MetadataType.SEGMENT)) {
        for (int i = segmentExpressions.size(); i > 0; i--) {
            MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(new ArrayList<>(segments.get(i - 1))).metadataType(MetadataType.SEGMENT).depthLevel(i).segmentColumns(segmentColumns.subList(0, i)).build();
            convertedRelNode = getSegmentAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, i, handlerContext);
            locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
            createNewAggregations = false;
        }
    }
    if (analyzeInfoProvider.supportsMetadataType(MetadataType.TABLE) && metadataLevel.includes(MetadataType.TABLE)) {
        MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(Collections.emptyList()).metadataType(MetadataType.TABLE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
        convertedRelNode = getTableAggRelNode(convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
    } else {
        throw new IllegalStateException("Analyze table with NONE level");
    }
    boolean useStatistics = context.getOptions().getOption(PlannerSettings.STATISTICS_USE);
    SqlNumericLiteral samplePercentLiteral = sqlAnalyzeTable.getSamplePercent();
    double samplePercent = samplePercentLiteral == null ? 100.0 : samplePercentLiteral.intValue(true);
    // Step 3: adds rel nodes for producing statistics analyze if required
    RelNode analyzeRel = useStatistics ? new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertToRawDrel(relNode), samplePercent) : convertToRawDrel(relBuilder.values(new String[] { "" }, "").build());
    MetadataControllerContext metadataControllerContext = MetadataControllerContext.builder().tableInfo(tableInfo).metastoreTableInfo(metastoreTableInfo).location(((FormatSelection) table.getSelection()).getSelection().getSelectionRoot()).interestingColumns(interestingColumns).segmentColumns(segmentColumns).metadataToHandle(allMetaToHandle).metadataToRemove(metadataToRemove).analyzeMetadataLevel(metadataLevel).build();
    convertedRelNode = new MetadataControllerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, analyzeRel, metadataControllerContext);
    return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) UserException(org.apache.drill.common.exceptions.UserException) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) LoggerFactory(org.slf4j.LoggerFactory) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) ArrayListMultimap(org.apache.drill.shaded.guava.com.google.common.collect.ArrayListMultimap) MetadataHandlerContext(org.apache.drill.exec.metastore.analyze.MetadataHandlerContext) PhysicalOperator(org.apache.drill.exec.physical.base.PhysicalOperator) SqlNode(org.apache.calcite.sql.SqlNode) RelBuilder(org.apache.calcite.tools.RelBuilder) FieldReference(org.apache.drill.common.expression.FieldReference) Multimap(org.apache.drill.shaded.guava.com.google.common.collect.Multimap) SqlSelect(org.apache.calcite.sql.SqlSelect) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) ValidationException(org.apache.calcite.tools.ValidationException) ForemanSetupException(org.apache.drill.exec.work.foreman.ForemanSetupException) SqlNumericLiteral(org.apache.calcite.sql.SqlNumericLiteral) MetadataInfoCollector(org.apache.drill.exec.metastore.analyze.MetadataInfoCollector) ExecConstants(org.apache.drill.exec.ExecConstants) SqlMetastoreAnalyzeTable(org.apache.drill.exec.planner.sql.parser.SqlMetastoreAnalyzeTable) MetadataAggregateContext(org.apache.drill.exec.metastore.analyze.MetadataAggregateContext) TableScan(org.apache.calcite.rel.core.TableScan) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) Pointer(org.apache.drill.exec.util.Pointer) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) SqlLiteral(org.apache.calcite.sql.SqlLiteral) SqlUnsupportedException(org.apache.drill.exec.work.foreman.SqlUnsupportedException) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) MetadataAggRel(org.apache.drill.exec.planner.logical.MetadataAggRel) DrillRelOptUtil(org.apache.drill.exec.planner.common.DrillRelOptUtil) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) MetadataHandlerRel(org.apache.drill.exec.planner.logical.MetadataHandlerRel) CheckedSupplier(org.apache.drill.common.util.function.CheckedSupplier) RelDataType(org.apache.calcite.rel.type.RelDataType) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) Logger(org.slf4j.Logger) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) IOException(java.io.IOException) RelNode(org.apache.calcite.rel.RelNode) Prel(org.apache.drill.exec.planner.physical.Prel) AbstractSchema(org.apache.drill.exec.store.AbstractSchema) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) RelConversionException(org.apache.calcite.tools.RelConversionException) Strings(org.apache.parquet.Strings) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel) PhysicalPlan(org.apache.drill.exec.physical.PhysicalPlan) LOGICAL_BUILDER(org.apache.drill.exec.planner.logical.DrillRelFactories.LOGICAL_BUILDER) SqlNodeList(org.apache.calcite.sql.SqlNodeList) MetadataControllerRel(org.apache.drill.exec.planner.logical.MetadataControllerRel) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) AnalyzeInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) DrillTable(org.apache.drill.exec.planner.logical.DrillTable) AnalyzeInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeInfoProvider) ArrayList(java.util.ArrayList) FormatSelection(org.apache.drill.exec.store.dfs.FormatSelection) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) DrillAnalyzeRel(org.apache.drill.exec.planner.logical.DrillAnalyzeRel) SchemaPath(org.apache.drill.common.expression.SchemaPath) SqlUnsupportedException(org.apache.drill.exec.work.foreman.SqlUnsupportedException) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) MetadataHandlerContext(org.apache.drill.exec.metastore.analyze.MetadataHandlerContext) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) MetadataControllerRel(org.apache.drill.exec.planner.logical.MetadataControllerRel) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) TableScan(org.apache.calcite.rel.core.TableScan) RelBuilder(org.apache.calcite.tools.RelBuilder) MetastoreException(org.apache.drill.metastore.exceptions.MetastoreException) MetadataType(org.apache.drill.metastore.metadata.MetadataType) DrillScreenRel(org.apache.drill.exec.planner.logical.DrillScreenRel) RelNode(org.apache.calcite.rel.RelNode) NamedExpression(org.apache.drill.common.logical.data.NamedExpression) DrillRel(org.apache.drill.exec.planner.logical.DrillRel) MetadataInfoCollector(org.apache.drill.exec.metastore.analyze.MetadataInfoCollector) MetadataControllerContext(org.apache.drill.exec.metastore.analyze.MetadataControllerContext) SqlNumericLiteral(org.apache.calcite.sql.SqlNumericLiteral)

Example 7 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class TestTableMetadataUnitConversion method testSegmentMetadata.

@Test
public void testSegmentMetadata() {
    TableInfo tableInfo = data.basicTableInfo;
    MetadataInfo metadataInfo = MetadataInfo.builder().type(MetadataType.SEGMENT).key("part_int=3").identifier("part_int=3/part_varchar=g").build();
    Path path = new Path("/tmp/nation");
    String unitPath = path.toUri().getPath();
    Set<Path> locations = new HashSet<>();
    locations.add(new Path("part_int=3/part_varchar=g/0_0_0.parquet"));
    locations.add(new Path("part_int=3/part_varchar=g/0_0_1.parquet"));
    List<String> unitLocations = locations.stream().map(location -> location.toUri().getPath()).collect(Collectors.toList());
    // check required fields
    SegmentMetadata requiredFieldsMetadata = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).path(path).locations(locations).build();
    TableMetadataUnit requiredFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(BaseMetadata.UNDEFINED_TIME).path(path.toUri().getPath()).location(unitPath).locations(unitLocations).build();
    TableMetadataUnit requiredFieldsUnit = requiredFieldsMetadata.toMetadataUnit();
    assertEquals(requiredFieldsExpectedUnit, requiredFieldsUnit);
    assertNotNull(SegmentMetadata.builder().metadataUnit(requiredFieldsUnit).build());
    SchemaPath column = SchemaPath.getSimplePath("dir1");
    List<String> partitionValues = Collections.singletonList("part_varchar=g");
    SegmentMetadata allFieldsMetadata = SegmentMetadata.builder().tableInfo(tableInfo).metadataInfo(metadataInfo).schema(data.schema).columnsStatistics(data.columnsStatistics).metadataStatistics(data.metadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path).locations(locations).column(column).partitionValues(partitionValues).build();
    TableMetadataUnit allFieldsExpectedUnit = TableMetadataUnit.builder().storagePlugin(tableInfo.storagePlugin()).workspace(tableInfo.workspace()).tableName(tableInfo.name()).metadataType(metadataInfo.type().name()).metadataKey(metadataInfo.key()).metadataIdentifier(metadataInfo.identifier()).schema(data.unitSchema).columnsStatistics(data.unitColumnsStatistics).metadataStatistics(data.unitMetadataStatistics).lastModifiedTime(data.lastModifiedTime).path(path.toUri().getPath()).location(unitPath).locations(unitLocations).column(column.toString()).partitionValues(partitionValues).build();
    TableMetadataUnit allFieldsUnit = allFieldsMetadata.toMetadataUnit();
    assertEquals(allFieldsExpectedUnit, allFieldsUnit);
    assertNotNull(SegmentMetadata.builder().metadataUnit(allFieldsUnit).build());
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) HashMap(java.util.HashMap) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) BaseTest(org.apache.drill.test.BaseTest) HashSet(java.util.HashSet) PartitionMetadata(org.apache.drill.metastore.metadata.PartitionMetadata) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Assert.assertNotNull(org.junit.Assert.assertNotNull) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) Collection(java.util.Collection) SchemaPath(org.apache.drill.common.expression.SchemaPath) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) Set(java.util.Set) Test(org.junit.Test) Category(org.junit.experimental.categories.Category) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) MetastoreTest(org.apache.drill.categories.MetastoreTest) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaPath(org.apache.drill.common.expression.SchemaPath) TableInfo(org.apache.drill.metastore.metadata.TableInfo) HashSet(java.util.HashSet) BaseTest(org.apache.drill.test.BaseTest) Test(org.junit.Test) MetastoreTest(org.apache.drill.categories.MetastoreTest)

Example 8 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class TestMetastoreCommands method testAnalyzeLowerLevelMetadata.

@Test
public void testAnalyzeLowerLevelMetadata() throws Exception {
    // checks that metadata for levels below specified in analyze statement is absent
    String tableName = "multilevel/parquetLowerLevel";
    TableInfo tableInfo = getTableInfo(tableName, "tmp");
    dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/parquet"), Paths.get(tableName));
    List<MetadataType> analyzeLevels = Arrays.asList(MetadataType.FILE, MetadataType.SEGMENT, MetadataType.TABLE);
    for (MetadataType analyzeLevel : analyzeLevels) {
        try {
            testBuilder().sqlQuery("ANALYZE TABLE dfs.tmp.`%s` REFRESH METADATA '%s' level", tableName, analyzeLevel.name()).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
            Set<MetadataType> emptyMetadataLevels = Arrays.stream(MetadataType.values()).filter(metadataType -> metadataType.compareTo(analyzeLevel) > 0 && // for the case when there are no segment metadata, default segment is present
            metadataType.compareTo(MetadataType.SEGMENT) > 0 && metadataType.compareTo(MetadataType.ALL) < 0).collect(Collectors.toSet());
            BasicTablesRequests.RequestMetadata requestMetadata = BasicTablesRequests.RequestMetadata.builder().tableInfo(tableInfo).metadataTypes(emptyMetadataLevels).build();
            List<TableMetadataUnit> metadataUnitList = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().request(requestMetadata);
            assertTrue(String.format("Some metadata [%s] for [%s] analyze query level is present" + metadataUnitList, emptyMetadataLevels, analyzeLevel), metadataUnitList.isEmpty());
        } finally {
            run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
        }
    }
}
Also used : MetadataType(org.apache.drill.metastore.metadata.MetadataType) BaseStatisticsKind(org.apache.drill.metastore.statistics.BaseStatisticsKind) Arrays(java.util.Arrays) ClusterTest(org.apache.drill.test.ClusterTest) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) BaseTableMetadata(org.apache.drill.metastore.metadata.BaseTableMetadata) SlowTest(org.apache.drill.categories.SlowTest) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) Assert.fail(org.junit.Assert.fail) AnalyzeParquetInfoProvider(org.apache.drill.exec.metastore.analyze.AnalyzeParquetInfoProvider) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) ColumnStatisticsKind(org.apache.drill.metastore.statistics.ColumnStatisticsKind) Delete(org.apache.drill.metastore.operate.Delete) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) Set(java.util.Set) Category(org.junit.experimental.categories.Category) Instant(java.time.Instant) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) TypeProtos(org.apache.drill.common.types.TypeProtos) List(java.util.List) Assert.assertFalse(org.junit.Assert.assertFalse) MetastoreTest(org.apache.drill.categories.MetastoreTest) ExecConstants(org.apache.drill.exec.ExecConstants) ClusterFixtureBuilder(org.apache.drill.test.ClusterFixtureBuilder) TableInfo(org.apache.drill.metastore.metadata.TableInfo) BeforeClass(org.junit.BeforeClass) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) HashSet(java.util.HashSet) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) TableStatisticsKind(org.apache.drill.metastore.statistics.TableStatisticsKind) ExpectedException(org.junit.rules.ExpectedException) Before(org.junit.Before) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) ClusterFixture(org.apache.drill.test.ClusterFixture) Assert.assertNotNull(org.junit.Assert.assertNotNull) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) UserRemoteException(org.apache.drill.common.exceptions.UserRemoteException) Assert.assertTrue(org.junit.Assert.assertTrue) FileUtils(org.apache.commons.io.FileUtils) Test(org.junit.Test) File(java.io.File) ImmutableMap(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableMap) Rule(org.junit.Rule) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) PlannerSettings(org.apache.drill.exec.planner.physical.PlannerSettings) Paths(java.nio.file.Paths) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) ImmutableSet(org.apache.drill.shaded.guava.com.google.common.collect.ImmutableSet) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) MetadataType(org.apache.drill.metastore.metadata.MetadataType) TableInfo(org.apache.drill.metastore.metadata.TableInfo) MetastoreTableInfo(org.apache.drill.metastore.components.tables.MetastoreTableInfo) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) ClusterTest(org.apache.drill.test.ClusterTest) SlowTest(org.apache.drill.categories.SlowTest) MetastoreTest(org.apache.drill.categories.MetastoreTest) Test(org.junit.Test)

Example 9 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class MetadataControllerBatch method getMetadataUnits.

private List<TableMetadataUnit> getMetadataUnits(TupleReader reader, int nestingLevel) {
    List<TableMetadataUnit> metadataUnits = new ArrayList<>();
    TupleMetadata columnMetadata = reader.tupleSchema();
    ObjectReader metadataColumnReader = reader.column(MetastoreAnalyzeConstants.METADATA_TYPE);
    Preconditions.checkNotNull(metadataColumnReader, "metadataType column wasn't found");
    ObjectReader underlyingMetadataReader = reader.column(MetastoreAnalyzeConstants.COLLECTED_MAP_FIELD);
    if (underlyingMetadataReader != null) {
        if (!underlyingMetadataReader.schema().isArray()) {
            throw new IllegalStateException("Incoming vector with name `collected_map` should be repeated map");
        }
        // current row contains information about underlying metadata
        ArrayReader array = underlyingMetadataReader.array();
        while (array.next()) {
            metadataUnits.addAll(getMetadataUnits(array.tuple(), nestingLevel + 1));
        }
    }
    List<StatisticsHolder<?>> metadataStatistics = getMetadataStatistics(reader, columnMetadata);
    Long rowCount = (Long) metadataStatistics.stream().filter(statisticsHolder -> statisticsHolder.getStatisticsKind() == TableStatisticsKind.ROW_COUNT).findAny().map(StatisticsHolder::getStatisticsValue).orElse(null);
    Map<SchemaPath, ColumnStatistics<?>> columnStatistics = getColumnStatistics(reader, columnMetadata, rowCount);
    MetadataType metadataType = MetadataType.valueOf(metadataColumnReader.scalar().getString());
    BaseMetadata metadata;
    switch(metadataType) {
        case TABLE:
            {
                metadata = getTableMetadata(reader, metadataStatistics, columnStatistics);
                break;
            }
        case SEGMENT:
            {
                metadata = getSegmentMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case PARTITION:
            {
                metadata = getPartitionMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case FILE:
            {
                metadata = getFileMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        case ROW_GROUP:
            {
                metadata = getRowGroupMetadata(reader, metadataStatistics, columnStatistics, nestingLevel);
                break;
            }
        default:
            throw new UnsupportedOperationException("Unsupported metadata type: " + metadataType);
    }
    metadataUnits.add(metadata.toMetadataUnit());
    return metadataUnits;
}
Also used : ColumnStatistics(org.apache.drill.metastore.statistics.ColumnStatistics) TableMetadataUnit(org.apache.drill.metastore.components.tables.TableMetadataUnit) ArrayList(java.util.ArrayList) MetadataType(org.apache.drill.metastore.metadata.MetadataType) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) StatisticsHolder(org.apache.drill.metastore.statistics.StatisticsHolder) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) ObjectReader(org.apache.drill.exec.vector.accessor.ObjectReader)

Example 10 with MetadataType

use of org.apache.drill.metastore.metadata.MetadataType in project drill by apache.

the class MetadataHandlerBatch method writeMetadata.

private <T extends BaseMetadata & LocationProvider> VectorContainer writeMetadata(List<T> metadataList) {
    BaseMetadata firstElement = metadataList.iterator().next();
    ResultSetLoader resultSetLoader = getResultSetLoaderForMetadata(firstElement);
    resultSetLoader.startBatch();
    RowSetLoader rowWriter = resultSetLoader.writer();
    Iterator<T> segmentsIterator = metadataList.iterator();
    while (!rowWriter.isFull() && segmentsIterator.hasNext()) {
        T metadata = segmentsIterator.next();
        metadataToHandle.remove(metadata.getMetadataInfo().identifier());
        List<Object> arguments = new ArrayList<>();
        // adds required segment names to the arguments
        arguments.add(metadata.getPath().toUri().getPath());
        Collections.addAll(arguments, Arrays.copyOf(MetadataIdentifierUtils.getValuesFromMetadataIdentifier(metadata.getMetadataInfo().identifier()), popConfig.getContext().segmentColumns().size()));
        // adds column statistics values assuming that they are sorted in alphabetic order
        // (see getResultSetLoaderForMetadata() method)
        metadata.getColumnsStatistics().entrySet().stream().sorted(Comparator.comparing(e -> e.getKey().toExpr())).map(Map.Entry::getValue).flatMap(columnStatistics -> AnalyzeColumnUtils.COLUMN_STATISTICS_FUNCTIONS.keySet().stream().map(columnStatistics::get)).forEach(arguments::add);
        AnalyzeColumnUtils.META_STATISTICS_FUNCTIONS.keySet().stream().map(metadata::getStatistic).forEach(arguments::add);
        // collectedMap field value
        arguments.add(new Object[] {});
        if (metadataType == MetadataType.SEGMENT) {
            arguments.add(((SegmentMetadata) metadata).getLocations().stream().map(path -> path.toUri().getPath()).toArray(String[]::new));
        }
        if (metadataType == MetadataType.ROW_GROUP) {
            arguments.add(String.valueOf(((RowGroupMetadata) metadata).getRowGroupIndex()));
            arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.START)));
            arguments.add(Long.toString(metadata.getStatistic(() -> ExactStatisticsConstants.LENGTH)));
        }
        arguments.add(metadata.getSchema().jsonString());
        arguments.add(String.valueOf(metadata.getLastModifiedTime()));
        arguments.add(metadataType.name());
        rowWriter.addRow(arguments.toArray());
    }
    return resultSetLoader.harvest();
}
Also used : AbstractSingleRecordBatch(org.apache.drill.exec.record.AbstractSingleRecordBatch) MetadataType(org.apache.drill.metastore.metadata.MetadataType) Arrays(java.util.Arrays) LoggerFactory(org.slf4j.LoggerFactory) Types(org.apache.drill.common.types.Types) MetadataInfo(org.apache.drill.metastore.metadata.MetadataInfo) RowSetReader(org.apache.drill.exec.physical.rowSet.RowSetReader) VectorContainer(org.apache.drill.exec.record.VectorContainer) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ResultSetLoaderImpl(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) BatchSchema(org.apache.drill.exec.record.BatchSchema) BasicTablesRequests(org.apache.drill.metastore.components.tables.BasicTablesRequests) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) SchemaPath(org.apache.drill.common.expression.SchemaPath) RecordBatch(org.apache.drill.exec.record.RecordBatch) MetastoreAnalyzeConstants(org.apache.drill.exec.metastore.analyze.MetastoreAnalyzeConstants) Collectors(java.util.stream.Collectors) List(java.util.List) MinorType(org.apache.drill.common.types.TypeProtos.MinorType) ResultSetOptionBuilder(org.apache.drill.exec.physical.resultSet.impl.ResultSetOptionBuilder) Preconditions(org.apache.drill.shaded.guava.com.google.common.base.Preconditions) MetadataIdentifierUtils(org.apache.drill.exec.metastore.analyze.MetadataIdentifierUtils) MaterializedField(org.apache.drill.exec.record.MaterializedField) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) Function(java.util.function.Function) ColumnNamesOptions(org.apache.drill.exec.metastore.ColumnNamesOptions) ArrayList(java.util.ArrayList) OutOfMemoryException(org.apache.drill.exec.exception.OutOfMemoryException) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) DirectRowSet(org.apache.drill.exec.physical.rowSet.DirectRowSet) StreamSupport(java.util.stream.StreamSupport) NONE(org.apache.drill.exec.record.RecordBatch.IterOutcome.NONE) FragmentContext(org.apache.drill.exec.ops.FragmentContext) FileMetadata(org.apache.drill.metastore.metadata.FileMetadata) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) Logger(org.slf4j.Logger) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) Iterator(java.util.Iterator) ExactStatisticsConstants(org.apache.drill.metastore.statistics.ExactStatisticsConstants) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) StatisticsKind(org.apache.drill.metastore.statistics.StatisticsKind) MetadataHandlerPOP(org.apache.drill.exec.physical.config.MetadataHandlerPOP) LocationProvider(org.apache.drill.metastore.metadata.LocationProvider) VarCharVector(org.apache.drill.exec.vector.VarCharVector) Tables(org.apache.drill.metastore.components.tables.Tables) Comparator(java.util.Comparator) AnalyzeColumnUtils(org.apache.drill.exec.metastore.analyze.AnalyzeColumnUtils) Collections(java.util.Collections) ArrayList(java.util.ArrayList) RowGroupMetadata(org.apache.drill.metastore.metadata.RowGroupMetadata) SegmentMetadata(org.apache.drill.metastore.metadata.SegmentMetadata) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) BaseMetadata(org.apache.drill.metastore.metadata.BaseMetadata) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader)

Aggregations

MetadataType (org.apache.drill.metastore.metadata.MetadataType)11 SchemaPath (org.apache.drill.common.expression.SchemaPath)9 Path (org.apache.hadoop.fs.Path)7 Arrays (java.util.Arrays)6 Collections (java.util.Collections)6 List (java.util.List)6 Collectors (java.util.stream.Collectors)6 FileMetadata (org.apache.drill.metastore.metadata.FileMetadata)6 MetadataInfo (org.apache.drill.metastore.metadata.MetadataInfo)6 RowGroupMetadata (org.apache.drill.metastore.metadata.RowGroupMetadata)6 SegmentMetadata (org.apache.drill.metastore.metadata.SegmentMetadata)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 HashSet (java.util.HashSet)4 MetastoreTest (org.apache.drill.categories.MetastoreTest)4 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)4 BaseMetadata (org.apache.drill.metastore.metadata.BaseMetadata)4 HashMap (java.util.HashMap)3 Set (java.util.Set)3 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)3