use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.
the class MetastoreAnalyzeTableHandler method convertToDrel.
/**
* Converts to Drill logical plan
*/
private DrillRel convertToDrel(RelNode relNode, SqlMetastoreAnalyzeTable sqlAnalyzeTable, DrillTableInfo drillTableInfo) throws ForemanSetupException, IOException {
RelBuilder relBuilder = LOGICAL_BUILDER.create(relNode.getCluster(), null);
DrillTable table = drillTableInfo.drillTable();
AnalyzeInfoProvider analyzeInfoProvider = table.getGroupScan().getAnalyzeInfoProvider();
List<String> schemaPath = drillTableInfo.schemaPath();
String pluginName = schemaPath.get(0);
String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
String tableName = drillTableInfo.tableName();
TableInfo tableInfo = TableInfo.builder().name(tableName).owner(table.getUserName()).type(analyzeInfoProvider.getTableTypeName()).storagePlugin(pluginName).workspace(workspaceName).build();
ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(context.getOptions());
List<String> segmentColumns = analyzeInfoProvider.getSegmentColumns(table, columnNamesOptions).stream().map(SchemaPath::getRootSegmentPath).collect(Collectors.toList());
List<NamedExpression> segmentExpressions = segmentColumns.stream().map(partitionName -> new NamedExpression(SchemaPath.getSimplePath(partitionName), FieldReference.getWithQuotedRef(partitionName))).collect(Collectors.toList());
List<MetadataInfo> rowGroupsInfo = Collections.emptyList();
List<MetadataInfo> filesInfo = Collections.emptyList();
Multimap<Integer, MetadataInfo> segments = ArrayListMultimap.create();
BasicTablesRequests basicRequests;
try {
basicRequests = context.getMetastoreRegistry().get().tables().basicRequests();
} catch (MetastoreException e) {
logger.error("Error when obtaining Metastore instance for table {}", tableName, e);
DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, e.getMessage()).build());
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
MetadataType metadataLevel = getMetadataType(sqlAnalyzeTable);
List<SchemaPath> interestingColumns = sqlAnalyzeTable.getFieldNames();
MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(tableInfo);
List<MetadataInfo> allMetaToHandle = null;
List<MetadataInfo> metadataToRemove = new ArrayList<>();
// whether incremental analyze may be produced
if (metastoreTableInfo.isExists()) {
RelNode finalRelNode = relNode;
CheckedSupplier<TableScan, SqlUnsupportedException> tableScanSupplier = () -> DrillRelOptUtil.findScan(convertToDrel(finalRelNode.getInput(0)));
MetadataInfoCollector metadataInfoCollector = analyzeInfoProvider.getMetadataInfoCollector(basicRequests, tableInfo, (FormatSelection) table.getSelection(), context.getPlannerSettings(), tableScanSupplier, interestingColumns, metadataLevel, segmentColumns.size());
if (!metadataInfoCollector.isOutdated()) {
DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, "Table metadata is up to date, analyze wasn't performed.").build());
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
// updates scan to read updated / new files, pass removed files into metadata handler
relNode = relNode.copy(relNode.getTraitSet(), Collections.singletonList(metadataInfoCollector.getPrunedScan()));
filesInfo = metadataInfoCollector.getFilesInfo();
segments = metadataInfoCollector.getSegmentsInfo();
rowGroupsInfo = metadataInfoCollector.getRowGroupsInfo();
allMetaToHandle = metadataInfoCollector.getAllMetaToHandle();
metadataToRemove = metadataInfoCollector.getMetadataToRemove();
}
// Step 2: constructs plan for producing analyze
DrillRel convertedRelNode = convertToRawDrel(relNode);
boolean createNewAggregations = true;
// List of columns for which statistics should be collected: interesting columns + segment columns
List<SchemaPath> statisticsColumns = interestingColumns == null ? null : new ArrayList<>(interestingColumns);
if (statisticsColumns != null) {
segmentColumns.stream().map(SchemaPath::getSimplePath).forEach(statisticsColumns::add);
}
SchemaPath locationField = analyzeInfoProvider.getLocationField(columnNamesOptions);
if (analyzeInfoProvider.supportsMetadataType(MetadataType.ROW_GROUP) && metadataLevel.includes(MetadataType.ROW_GROUP)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(rowGroupsInfo).metadataType(MetadataType.ROW_GROUP).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getRowGroupAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, handlerContext);
createNewAggregations = false;
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.FILE) && metadataLevel.includes(MetadataType.FILE)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(filesInfo).metadataType(MetadataType.FILE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getFileAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
createNewAggregations = false;
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.SEGMENT) && metadataLevel.includes(MetadataType.SEGMENT)) {
for (int i = segmentExpressions.size(); i > 0; i--) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(new ArrayList<>(segments.get(i - 1))).metadataType(MetadataType.SEGMENT).depthLevel(i).segmentColumns(segmentColumns.subList(0, i)).build();
convertedRelNode = getSegmentAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, i, handlerContext);
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
createNewAggregations = false;
}
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.TABLE) && metadataLevel.includes(MetadataType.TABLE)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(Collections.emptyList()).metadataType(MetadataType.TABLE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getTableAggRelNode(convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
} else {
throw new IllegalStateException("Analyze table with NONE level");
}
boolean useStatistics = context.getOptions().getOption(PlannerSettings.STATISTICS_USE);
SqlNumericLiteral samplePercentLiteral = sqlAnalyzeTable.getSamplePercent();
double samplePercent = samplePercentLiteral == null ? 100.0 : samplePercentLiteral.intValue(true);
// Step 3: adds rel nodes for producing statistics analyze if required
RelNode analyzeRel = useStatistics ? new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertToRawDrel(relNode), samplePercent) : convertToRawDrel(relBuilder.values(new String[] { "" }, "").build());
MetadataControllerContext metadataControllerContext = MetadataControllerContext.builder().tableInfo(tableInfo).metastoreTableInfo(metastoreTableInfo).location(((FormatSelection) table.getSelection()).getSelection().getSelectionRoot()).interestingColumns(interestingColumns).segmentColumns(segmentColumns).metadataToHandle(allMetaToHandle).metadataToRemove(metadataToRemove).analyzeMetadataLevel(metadataLevel).build();
convertedRelNode = new MetadataControllerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, analyzeRel, metadataControllerContext);
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.
the class TestMetastoreWithEasyFormatPlugin method testIncrementalAnalyzeUnchangedTable.
@Test
public void testIncrementalAnalyzeUnchangedTable() throws Exception {
String tableName = "multilevel/csvUnchanged";
File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("multilevel/csv"), Paths.get(tableName));
TableInfo tableInfo = getTableInfo(tableName, "tmp", "csv");
long lastModifiedTime = getMaxLastModified(table);
try {
testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
List<SegmentMetadata> segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
assertEquals(15, segmentMetadata.size());
testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` (schema=>%s)) REFRESH METADATA", tableName, SCHEMA_STRING).unOrdered().baselineColumns("ok", "summary").baselineValues(false, "Table metadata is up to date, analyze wasn't performed.").go();
segmentMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().segmentsMetadataByMetadataKey(tableInfo, null, null);
assertEquals(15, segmentMetadata.size());
long postAnalyzeLastModifiedTime = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo).lastModifiedTime();
assertEquals(lastModifiedTime, postAnalyzeLastModifiedTime);
} finally {
run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
FileUtils.deleteQuietly(table);
}
}
use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.
the class TestMongoBasicTablesRequests method testDelete.
@Test
public void testDelete() {
MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(nationTableInfo);
assertTrue(metastoreTableInfo.isExists());
tables.modify().delete(Delete.builder().metadataType(MetadataType.TABLE).filter(nationTableInfo.toFilter()).build()).execute();
metastoreTableInfo = basicRequests.metastoreTableInfo(nationTableInfo);
assertFalse(metastoreTableInfo.isExists());
List<TableMetadataUnit> res = tables.read().metadataType(MetadataType.ALL).execute();
assertFalse(res.isEmpty());
tables.modify().purge();
res = tables.read().metadataType(MetadataType.ALL).execute();
assertTrue(res.isEmpty());
prepareData(tables);
}
use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.
the class TestMongoBasicTablesRequests method testMetastoreTableInfoExistingTable.
@Test
public void testMetastoreTableInfoExistingTable() {
MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(nationTableInfo);
assertTrue(metastoreTableInfo.isExists());
assertEquals(nationTableInfo, metastoreTableInfo.tableInfo());
assertEquals(nationTable.lastModifiedTime(), metastoreTableInfo.lastModifiedTime());
assertEquals(Metadata.UNDEFINED, metastoreTableInfo.metastoreVersion());
}
use of org.apache.drill.metastore.components.tables.MetastoreTableInfo in project drill by apache.
the class TestMetastoreWithEasyFormatPlugin method testNonEmptyTableWithEmptyFile.
@Test
public void testNonEmptyTableWithEmptyFile() throws Exception {
String tableName = "csv_with_empty_file";
dirTestWatcher.copyResourceToTestTmp(Paths.get("store", "text", "directoryWithEmptyCSV", "empty.csv"), Paths.get(tableName, "empty.csv"));
File table = dirTestWatcher.copyResourceToTestTmp(Paths.get("store", "text", "data", "nations.csv"), Paths.get(tableName, "nations.csv")).getParentFile();
TableInfo tableInfo = getTableInfo(tableName, "tmp", "csv");
TupleMetadata schema = new SchemaBuilder().add("n_nationkey", TypeProtos.MinorType.INT).add("n_name", TypeProtos.MinorType.VARCHAR).add("n_regionkey", TypeProtos.MinorType.INT).add("n_comment", TypeProtos.MinorType.VARCHAR).build();
Map<SchemaPath, ColumnStatistics<?>> columnStatistics = ImmutableMap.<SchemaPath, ColumnStatistics<?>>builder().put(SchemaPath.getSimplePath("n_nationkey"), getColumnStatistics(0, 24, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_name"), getColumnStatistics("ALGERIA", "VIETNAM", 25L, TypeProtos.MinorType.VARCHAR)).put(SchemaPath.getSimplePath("n_regionkey"), getColumnStatistics(0, 4, 25L, TypeProtos.MinorType.INT)).put(SchemaPath.getSimplePath("n_comment"), getColumnStatistics("alfoxespromiseslylyaccordingtotheregularaccounts.boldrequestsalon", "yfinalpackages.slowfoxescajolequickly.quicklysilentplateletsbreachironicaccounts.unusualpintobe", 25L, TypeProtos.MinorType.VARCHAR)).build();
BaseTableMetadata expectedTableMetadata = BaseTableMetadata.builder().tableInfo(tableInfo).metadataInfo(TABLE_META_INFO).schema(schema).location(new Path(table.toURI().getPath())).columnsStatistics(columnStatistics).metadataStatistics(Arrays.asList(new StatisticsHolder<>(25L, TableStatisticsKind.ROW_COUNT), new StatisticsHolder<>(MetadataType.ALL, TableStatisticsKind.ANALYZE_METADATA_LEVEL))).partitionKeys(Collections.emptyMap()).lastModifiedTime(getMaxLastModified(table)).build();
try {
testBuilder().sqlQuery("ANALYZE TABLE table(dfs.tmp.`%s` " + "(schema=>'inline=(" + "`n_nationkey` INT not null," + "`n_name` VARCHAR not null," + "`n_regionkey` INT not null," + "`n_comment` VARCHAR not null)')) REFRESH METADATA", tableName).unOrdered().baselineColumns("ok", "summary").baselineValues(true, String.format("Collected / refreshed metadata for table [dfs.tmp.%s]", tableName)).go();
MetastoreTableInfo metastoreTableInfo = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().metastoreTableInfo(tableInfo);
assertTrue("table metadata wasn't found", metastoreTableInfo.isExists());
BaseTableMetadata tableMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().tableMetadata(tableInfo);
assertEquals(expectedTableMetadata, tableMetadata);
List<FileMetadata> filesMetadata = cluster.drillbit().getContext().getMetastoreRegistry().get().tables().basicRequests().filesMetadata(tableInfo, null, null);
assertEquals(2, filesMetadata.size());
String query = "select * from dfs.tmp.`%s`";
queryBuilder().sql(query, tableName).planMatcher().include("usedMetastore=true").match();
long rowCount = queryBuilder().sql(query, tableName).run().recordCount();
assertEquals(25, rowCount);
} finally {
run("analyze table dfs.tmp.`%s` drop metadata if exists", tableName);
}
}
Aggregations