use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class FileSystemPartitionDescriptor method createTableScan.
@Override
public TableScan createTableScan(List<PartitionLocation> newPartitionLocation, Path cacheFileRoot, boolean wasAllPartitionsPruned, MetadataContext metaContext) throws Exception {
List<Path> newFiles = new ArrayList<>();
for (PartitionLocation location : newPartitionLocation) {
if (!location.isCompositePartition()) {
newFiles.add(location.getEntirePartitionLocation());
} else {
final Collection<SimplePartitionLocation> subPartitions = location.getPartitionLocationRecursive();
for (PartitionLocation subPart : subPartitions) {
newFiles.add(subPart.getEntirePartitionLocation());
}
}
}
FormatSelection formatSelection = (FormatSelection) table.getSelection();
FileSelection newFileSelection = new FileSelection(null, newFiles, getBaseTableLocation(), cacheFileRoot, wasAllPartitionsPruned, formatSelection.getSelection().getDirStatus());
newFileSelection.setMetaContext(metaContext);
RelOptTable relOptTable = scanRel.getTable();
if (scanRel instanceof DrillScanRel) {
FileGroupScan newGroupScan = ((FileGroupScan) ((DrillScanRel) scanRel).getGroupScan()).clone(newFileSelection);
return new DrillScanRel(scanRel.getCluster(), scanRel.getTraitSet().plus(DrillRel.DRILL_LOGICAL), relOptTable, newGroupScan, scanRel.getRowType(), ((DrillScanRel) scanRel).getColumns(), true);
} else if (scanRel instanceof EnumerableTableScan) {
FormatSelection newFormatSelection = new FormatSelection(formatSelection.getFormat(), newFileSelection);
DynamicDrillTable dynamicDrillTable = new DynamicDrillTable(table.getPlugin(), table.getStorageEngineName(), table.getUserName(), newFormatSelection, table.getMetadataProviderManager());
/* Copy statistics from the original relOptTable */
DrillTranslatableTable newTable = new DrillTranslatableTable(dynamicDrillTable);
RelOptTableImpl newOptTableImpl = RelOptTableImpl.create(relOptTable.getRelOptSchema(), relOptTable.getRowType(), newTable, GuavaUtils.convertToUnshadedImmutableList(ImmutableList.of()));
// return an EnumerableTableScan with fileSelection being part of digest of TableScan node.
return DirPrunedEnumerableTableScan.create(scanRel.getCluster(), newOptTableImpl, newFileSelection.toString());
} else {
throw new UnsupportedOperationException("Only DrillScanRel and EnumerableTableScan is allowed!");
}
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class ConvertMetadataAggregateToDirectScanRule method buildDirectScan.
private DirectGroupScan buildDirectScan(List<SchemaPath> interestingColumns, DrillScanRel scan, PlannerSettings settings) throws IOException {
DrillTable drillTable = Utilities.getDrillTable(scan.getTable());
ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(settings.getOptions());
// populates schema to be used when adding record values
FormatSelection selection = (FormatSelection) drillTable.getSelection();
// adds partition columns to the schema
Map<String, Class<?>> schema = ColumnExplorer.getPartitionColumnNames(selection.getSelection(), columnNamesOptions).stream().collect(Collectors.toMap(Function.identity(), s -> String.class, (o, n) -> n));
// adds internal implicit columns to the schema
schema.put(MetastoreAnalyzeConstants.SCHEMA_FIELD, String.class);
schema.put(MetastoreAnalyzeConstants.LOCATION_FIELD, String.class);
schema.put(columnNamesOptions.rowGroupIndex(), String.class);
schema.put(columnNamesOptions.rowGroupStart(), String.class);
schema.put(columnNamesOptions.rowGroupLength(), String.class);
schema.put(columnNamesOptions.lastModifiedTime(), String.class);
return populateRecords(interestingColumns, schema, scan, columnNamesOptions);
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class MapRDBFormatMatcher method isReadable.
@Override
public DrillTable isReadable(DrillFileSystem fs, FileSelection selection, FileSystemPlugin fsPlugin, String storageEngineName, SchemaConfig schemaConfig) throws IOException {
if (isFileReadable(fs, selection.getFirstPath(fs))) {
MapRDBFormatPlugin mapRDBFormatPlugin = (MapRDBFormatPlugin) getFormatPlugin();
String tableName = mapRDBFormatPlugin.getTableName(selection);
TableProperties props = mapRDBFormatPlugin.getMaprFS().getTableProperties(new Path(tableName));
if (props.getAttr().getJson()) {
return new DynamicDrillTable(fsPlugin, storageEngineName, schemaConfig.getUserName(), new FormatSelection(mapRDBFormatPlugin.getConfig(), selection));
} else {
FormatSelection formatSelection = new FormatSelection(mapRDBFormatPlugin.getConfig(), selection);
return new MapRDBBinaryTable(storageEngineName, fsPlugin, mapRDBFormatPlugin, formatSelection);
}
}
return null;
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class MapRDBFormatMatcher method isReadableIndex.
/**
* Get an instance of DrillTable for a particular native secondary index
* @param fs
* @param selection
* @param fsPlugin
* @param storageEngineName
* @param userName
* @param secondaryIndexDesc
* @return
* @throws IOException
*/
public DrillTable isReadableIndex(DrillFileSystem fs, FileSelection selection, FileSystemPlugin fsPlugin, String storageEngineName, String userName, IndexDescriptor secondaryIndexDesc) throws IOException {
FileStatus status = selection.getFirstPath(fs);
if (!isFileReadable(fs, status)) {
return null;
}
MapRDBFormatPlugin fp = (MapRDBFormatPlugin) getFormatPlugin();
DrillTable dt = new DynamicDrillTable(fsPlugin, storageEngineName, userName, new FormatSelection(fp.getConfig(), selection));
// TODO: Create groupScan using index descriptor
dt.setGroupScan(fp.getGroupScan(userName, selection, null, /* columns */
(IndexDesc) ((MapRDBIndexDescriptor) secondaryIndexDesc).getOriginalDesc(), null));
return dt;
}
use of org.apache.drill.exec.store.dfs.FormatSelection in project drill by apache.
the class MetastoreAnalyzeTableHandler method convertToDrel.
/**
* Converts to Drill logical plan
*/
private DrillRel convertToDrel(RelNode relNode, SqlMetastoreAnalyzeTable sqlAnalyzeTable, DrillTableInfo drillTableInfo) throws ForemanSetupException, IOException {
RelBuilder relBuilder = LOGICAL_BUILDER.create(relNode.getCluster(), null);
DrillTable table = drillTableInfo.drillTable();
AnalyzeInfoProvider analyzeInfoProvider = table.getGroupScan().getAnalyzeInfoProvider();
List<String> schemaPath = drillTableInfo.schemaPath();
String pluginName = schemaPath.get(0);
String workspaceName = Strings.join(schemaPath.subList(1, schemaPath.size()), AbstractSchema.SCHEMA_SEPARATOR);
String tableName = drillTableInfo.tableName();
TableInfo tableInfo = TableInfo.builder().name(tableName).owner(table.getUserName()).type(analyzeInfoProvider.getTableTypeName()).storagePlugin(pluginName).workspace(workspaceName).build();
ColumnNamesOptions columnNamesOptions = new ColumnNamesOptions(context.getOptions());
List<String> segmentColumns = analyzeInfoProvider.getSegmentColumns(table, columnNamesOptions).stream().map(SchemaPath::getRootSegmentPath).collect(Collectors.toList());
List<NamedExpression> segmentExpressions = segmentColumns.stream().map(partitionName -> new NamedExpression(SchemaPath.getSimplePath(partitionName), FieldReference.getWithQuotedRef(partitionName))).collect(Collectors.toList());
List<MetadataInfo> rowGroupsInfo = Collections.emptyList();
List<MetadataInfo> filesInfo = Collections.emptyList();
Multimap<Integer, MetadataInfo> segments = ArrayListMultimap.create();
BasicTablesRequests basicRequests;
try {
basicRequests = context.getMetastoreRegistry().get().tables().basicRequests();
} catch (MetastoreException e) {
logger.error("Error when obtaining Metastore instance for table {}", tableName, e);
DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, e.getMessage()).build());
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
MetadataType metadataLevel = getMetadataType(sqlAnalyzeTable);
List<SchemaPath> interestingColumns = sqlAnalyzeTable.getFieldNames();
MetastoreTableInfo metastoreTableInfo = basicRequests.metastoreTableInfo(tableInfo);
List<MetadataInfo> allMetaToHandle = null;
List<MetadataInfo> metadataToRemove = new ArrayList<>();
// whether incremental analyze may be produced
if (metastoreTableInfo.isExists()) {
RelNode finalRelNode = relNode;
CheckedSupplier<TableScan, SqlUnsupportedException> tableScanSupplier = () -> DrillRelOptUtil.findScan(convertToDrel(finalRelNode.getInput(0)));
MetadataInfoCollector metadataInfoCollector = analyzeInfoProvider.getMetadataInfoCollector(basicRequests, tableInfo, (FormatSelection) table.getSelection(), context.getPlannerSettings(), tableScanSupplier, interestingColumns, metadataLevel, segmentColumns.size());
if (!metadataInfoCollector.isOutdated()) {
DrillRel convertedRelNode = convertToRawDrel(relBuilder.values(new String[] { MetastoreAnalyzeConstants.OK_FIELD_NAME, MetastoreAnalyzeConstants.SUMMARY_FIELD_NAME }, false, "Table metadata is up to date, analyze wasn't performed.").build());
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
// updates scan to read updated / new files, pass removed files into metadata handler
relNode = relNode.copy(relNode.getTraitSet(), Collections.singletonList(metadataInfoCollector.getPrunedScan()));
filesInfo = metadataInfoCollector.getFilesInfo();
segments = metadataInfoCollector.getSegmentsInfo();
rowGroupsInfo = metadataInfoCollector.getRowGroupsInfo();
allMetaToHandle = metadataInfoCollector.getAllMetaToHandle();
metadataToRemove = metadataInfoCollector.getMetadataToRemove();
}
// Step 2: constructs plan for producing analyze
DrillRel convertedRelNode = convertToRawDrel(relNode);
boolean createNewAggregations = true;
// List of columns for which statistics should be collected: interesting columns + segment columns
List<SchemaPath> statisticsColumns = interestingColumns == null ? null : new ArrayList<>(interestingColumns);
if (statisticsColumns != null) {
segmentColumns.stream().map(SchemaPath::getSimplePath).forEach(statisticsColumns::add);
}
SchemaPath locationField = analyzeInfoProvider.getLocationField(columnNamesOptions);
if (analyzeInfoProvider.supportsMetadataType(MetadataType.ROW_GROUP) && metadataLevel.includes(MetadataType.ROW_GROUP)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(rowGroupsInfo).metadataType(MetadataType.ROW_GROUP).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getRowGroupAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, handlerContext);
createNewAggregations = false;
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.FILE) && metadataLevel.includes(MetadataType.FILE)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(filesInfo).metadataType(MetadataType.FILE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getFileAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
createNewAggregations = false;
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.SEGMENT) && metadataLevel.includes(MetadataType.SEGMENT)) {
for (int i = segmentExpressions.size(); i > 0; i--) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(new ArrayList<>(segments.get(i - 1))).metadataType(MetadataType.SEGMENT).depthLevel(i).segmentColumns(segmentColumns.subList(0, i)).build();
convertedRelNode = getSegmentAggRelNode(segmentExpressions, convertedRelNode, createNewAggregations, statisticsColumns, locationField, i, handlerContext);
locationField = SchemaPath.getSimplePath(MetastoreAnalyzeConstants.LOCATION_FIELD);
createNewAggregations = false;
}
}
if (analyzeInfoProvider.supportsMetadataType(MetadataType.TABLE) && metadataLevel.includes(MetadataType.TABLE)) {
MetadataHandlerContext handlerContext = MetadataHandlerContext.builder().tableInfo(tableInfo).metadataToHandle(Collections.emptyList()).metadataType(MetadataType.TABLE).depthLevel(segmentExpressions.size()).segmentColumns(segmentColumns).build();
convertedRelNode = getTableAggRelNode(convertedRelNode, createNewAggregations, statisticsColumns, locationField, handlerContext);
} else {
throw new IllegalStateException("Analyze table with NONE level");
}
boolean useStatistics = context.getOptions().getOption(PlannerSettings.STATISTICS_USE);
SqlNumericLiteral samplePercentLiteral = sqlAnalyzeTable.getSamplePercent();
double samplePercent = samplePercentLiteral == null ? 100.0 : samplePercentLiteral.intValue(true);
// Step 3: adds rel nodes for producing statistics analyze if required
RelNode analyzeRel = useStatistics ? new DrillAnalyzeRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertToRawDrel(relNode), samplePercent) : convertToRawDrel(relBuilder.values(new String[] { "" }, "").build());
MetadataControllerContext metadataControllerContext = MetadataControllerContext.builder().tableInfo(tableInfo).metastoreTableInfo(metastoreTableInfo).location(((FormatSelection) table.getSelection()).getSelection().getSelectionRoot()).interestingColumns(interestingColumns).segmentColumns(segmentColumns).metadataToHandle(allMetaToHandle).metadataToRemove(metadataToRemove).analyzeMetadataLevel(metadataLevel).build();
convertedRelNode = new MetadataControllerRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode, analyzeRel, metadataControllerContext);
return new DrillScreenRel(convertedRelNode.getCluster(), convertedRelNode.getTraitSet(), convertedRelNode);
}
Aggregations