use of org.apache.calcite.rel.core.TableScan in project calcite by apache.
the class Lattice method populate.
private static boolean populate(List<RelNode> nodes, List<int[][]> tempLinks, RelNode rel) {
if (nodes.isEmpty() && rel instanceof LogicalProject) {
return populate(nodes, tempLinks, ((LogicalProject) rel).getInput());
}
if (rel instanceof TableScan) {
nodes.add(rel);
return true;
}
if (rel instanceof LogicalJoin) {
LogicalJoin join = (LogicalJoin) rel;
if (join.getJoinType() != JoinRelType.INNER) {
throw new RuntimeException("only inner join allowed, but got " + join.getJoinType());
}
populate(nodes, tempLinks, join.getLeft());
populate(nodes, tempLinks, join.getRight());
for (RexNode rex : RelOptUtil.conjunctions(join.getCondition())) {
tempLinks.add(grab(nodes, rex));
}
return true;
}
throw new RuntimeException("Invalid node type " + rel.getClass().getSimpleName() + " in lattice query");
}
use of org.apache.calcite.rel.core.TableScan in project calcite by apache.
the class RelOptMaterialization method tryUseStar.
/**
* Converts a relational expression to one that uses a
* {@link org.apache.calcite.schema.impl.StarTable}.
*
* <p>The relational expression is already in leaf-join-form, per
* {@link #toLeafJoinForm(org.apache.calcite.rel.RelNode)}.
*
* @return Rewritten expression, or null if expression cannot be rewritten
* to use the star
*/
public static RelNode tryUseStar(RelNode rel, final RelOptTable starRelOptTable) {
final StarTable starTable = starRelOptTable.unwrap(StarTable.class);
assert starTable != null;
RelNode rel2 = rel.accept(new RelShuttleImpl() {
@Override
public RelNode visit(TableScan scan) {
RelOptTable relOptTable = scan.getTable();
final Table table = relOptTable.unwrap(Table.class);
if (table.equals(starTable.tables.get(0))) {
Mappings.TargetMapping mapping = Mappings.createShiftMapping(starRelOptTable.getRowType().getFieldCount(), 0, 0, relOptTable.getRowType().getFieldCount());
final RelOptCluster cluster = scan.getCluster();
final RelNode scan2 = starRelOptTable.toRel(RelOptUtil.getContext(cluster));
return RelOptUtil.createProject(scan2, Mappings.asList(mapping.inverse()));
}
return scan;
}
@Override
public RelNode visit(LogicalJoin join) {
for (; ; ) {
RelNode rel = super.visit(join);
if (rel == join || !(rel instanceof LogicalJoin)) {
return rel;
}
join = (LogicalJoin) rel;
final ProjectFilterTable left = ProjectFilterTable.of(join.getLeft());
if (left != null) {
final ProjectFilterTable right = ProjectFilterTable.of(join.getRight());
if (right != null) {
try {
match(left, right, join.getCluster());
} catch (Util.FoundOne e) {
return (RelNode) e.getNode();
}
}
}
}
}
/**
* Throws a {@link org.apache.calcite.util.Util.FoundOne} containing
* a {@link org.apache.calcite.rel.logical.LogicalTableScan} on
* success. (Yes, an exception for normal operation.)
*/
private void match(ProjectFilterTable left, ProjectFilterTable right, RelOptCluster cluster) {
final Mappings.TargetMapping leftMapping = left.mapping();
final Mappings.TargetMapping rightMapping = right.mapping();
final RelOptTable leftRelOptTable = left.getTable();
final Table leftTable = leftRelOptTable.unwrap(Table.class);
final int leftCount = leftRelOptTable.getRowType().getFieldCount();
final RelOptTable rightRelOptTable = right.getTable();
final Table rightTable = rightRelOptTable.unwrap(Table.class);
if (leftTable instanceof StarTable && ((StarTable) leftTable).tables.contains(rightTable)) {
final int offset = ((StarTable) leftTable).columnOffset(rightTable);
Mappings.TargetMapping mapping = Mappings.merge(leftMapping, Mappings.offsetTarget(Mappings.offsetSource(rightMapping, offset), leftMapping.getTargetCount()));
final RelNode project = RelOptUtil.createProject(LogicalTableScan.create(cluster, leftRelOptTable), Mappings.asList(mapping.inverse()));
final List<RexNode> conditions = Lists.newArrayList();
if (left.condition != null) {
conditions.add(left.condition);
}
if (right.condition != null) {
conditions.add(RexUtil.apply(mapping, RexUtil.shift(right.condition, offset)));
}
final RelNode filter = RelOptUtil.createFilter(project, conditions);
throw new Util.FoundOne(filter);
}
if (rightTable instanceof StarTable && ((StarTable) rightTable).tables.contains(leftTable)) {
final int offset = ((StarTable) rightTable).columnOffset(leftTable);
Mappings.TargetMapping mapping = Mappings.merge(Mappings.offsetSource(leftMapping, offset), Mappings.offsetTarget(rightMapping, leftCount));
final RelNode project = RelOptUtil.createProject(LogicalTableScan.create(cluster, rightRelOptTable), Mappings.asList(mapping.inverse()));
final List<RexNode> conditions = Lists.newArrayList();
if (left.condition != null) {
conditions.add(RexUtil.apply(mapping, RexUtil.shift(left.condition, offset)));
}
if (right.condition != null) {
conditions.add(RexUtil.apply(mapping, right.condition));
}
final RelNode filter = RelOptUtil.createFilter(project, conditions);
throw new Util.FoundOne(filter);
}
}
});
if (rel2 == rel) {
// No rewrite happened.
return null;
}
final Program program = Programs.hep(ImmutableList.of(ProjectFilterTransposeRule.INSTANCE, AggregateProjectMergeRule.INSTANCE, AggregateFilterTransposeRule.INSTANCE), false, DefaultRelMetadataProvider.INSTANCE);
return program.run(null, rel2, null, ImmutableList.<RelOptMaterialization>of(), ImmutableList.<RelOptLattice>of());
}
use of org.apache.calcite.rel.core.TableScan in project drill by apache.
the class ConvertCountToDirectScanRule method onMatch.
@Override
public void onMatch(RelOptRuleCall call) {
final Aggregate agg = call.rel(0);
final TableScan scan = call.rel(call.rels.length - 1);
final Project project = call.rels.length == 3 ? (Project) call.rel(1) : null;
// 3) Additional checks are done further below ..
if (agg.getGroupCount() > 0 || agg.containsDistinctCall()) {
return;
}
DrillTable drillTable = DrillRelOptUtil.getDrillTable(scan);
if (drillTable == null) {
logger.debug("Rule does not apply since an eligible drill table instance was not found.");
return;
}
Object selection = drillTable.getSelection();
if (!(selection instanceof FormatSelection)) {
logger.debug("Rule does not apply since only Parquet file format is eligible.");
return;
}
PlannerSettings settings = call.getPlanner().getContext().unwrap(PlannerSettings.class);
// Rule is applicable only if the statistics for row count and null count are available from the metadata,
FormatSelection formatSelection = (FormatSelection) selection;
// Rule cannot be applied if the selection had wildcard since the totalrowcount cannot be read from the parent directory
if (formatSelection.getSelection().hadWildcard()) {
logger.debug("Rule does not apply when there is a wild card since the COUNT could not be determined from metadata.");
return;
}
Pair<Boolean, Metadata_V4.MetadataSummary> status = checkMetadataForScanStats(settings, drillTable, formatSelection);
if (!status.getLeft()) {
logger.debug("Rule does not apply since MetadataSummary metadata was not found.");
return;
}
Metadata_V4.MetadataSummary metadataSummary = status.getRight();
Map<String, Long> result = collectCounts(settings, metadataSummary, agg, scan, project);
logger.trace("Calculated the following aggregate counts: {}", result);
// if counts could not be determined, rule won't be applied
if (result.isEmpty()) {
logger.debug("Rule does not apply since one or more COUNTs could not be determined from metadata.");
return;
}
Path summaryFileName = Metadata.getSummaryFileName(formatSelection.getSelection().getSelectionRoot());
final RelDataType scanRowType = CountToDirectScanUtils.constructDataType(agg, result.keySet());
final DynamicPojoRecordReader<Long> reader = new DynamicPojoRecordReader<>(CountToDirectScanUtils.buildSchema(scanRowType.getFieldNames()), Collections.singletonList(new ArrayList<>(result.values())));
final ScanStats scanStats = new ScanStats(ScanStats.GroupScanProperty.EXACT_ROW_COUNT, 1, 1, scanRowType.getFieldCount());
final MetadataDirectGroupScan directScan = new MetadataDirectGroupScan(reader, summaryFileName, 1, scanStats, true, false);
final DrillDirectScanRel newScan = new DrillDirectScanRel(scan.getCluster(), scan.getTraitSet().plus(DrillRel.DRILL_LOGICAL), directScan, scanRowType);
final DrillProjectRel newProject = new DrillProjectRel(agg.getCluster(), agg.getTraitSet().plus(DrillRel.DRILL_LOGICAL), newScan, CountToDirectScanUtils.prepareFieldExpressions(scanRowType), agg.getRowType());
call.transformTo(newProject);
}
use of org.apache.calcite.rel.core.TableScan in project drill by apache.
the class FindLimit0Visitor method addLimitOnTopOfLeafNodes.
public static DrillRel addLimitOnTopOfLeafNodes(final DrillRel rel) {
final Pointer<Boolean> isUnsupported = new Pointer<>(false);
// to visit unsupported functions
final RexShuttle unsupportedFunctionsVisitor = new RexShuttle() {
@Override
public RexNode visitCall(RexCall call) {
final SqlOperator operator = call.getOperator();
if (isUnsupportedScalarFunction(operator)) {
isUnsupported.value = true;
return call;
}
return super.visitCall(call);
}
};
// to visit unsupported operators
final RelShuttle unsupportedOperationsVisitor = new RelShuttleImpl() {
@Override
public RelNode visit(RelNode other) {
if (other instanceof DrillUnionRelBase) {
isUnsupported.value = true;
return other;
} else if (other instanceof DrillProjectRelBase) {
if (!isUnsupported.value) {
other.accept(unsupportedFunctionsVisitor);
}
if (isUnsupported.value) {
return other;
}
}
return super.visit(other);
}
};
rel.accept(unsupportedOperationsVisitor);
if (isUnsupported.value) {
return rel;
}
// to add LIMIT (0) on top of leaf nodes
final RelShuttle addLimitOnScanVisitor = new RelShuttleImpl() {
private RelNode addLimitAsParent(RelNode node) {
final RexBuilder builder = node.getCluster().getRexBuilder();
final RexLiteral offset = builder.makeExactLiteral(BigDecimal.ZERO);
final RexLiteral fetch = builder.makeExactLiteral(BigDecimal.ZERO);
return new DrillLimitRel(node.getCluster(), node.getTraitSet(), node, offset, fetch);
}
@Override
public RelNode visit(LogicalValues values) {
return addLimitAsParent(values);
}
@Override
public RelNode visit(TableScan scan) {
return addLimitAsParent(scan);
}
@Override
public RelNode visit(RelNode other) {
if (other.getInputs().isEmpty()) {
// leaf operator
return addLimitAsParent(other);
}
return super.visit(other);
}
};
return (DrillRel) rel.accept(addLimitOnScanVisitor);
}
use of org.apache.calcite.rel.core.TableScan in project drill by apache.
the class FileMetadataInfoCollector method init.
private void init(FormatSelection selection, PlannerSettings settings, Supplier<TableScan> tableScanSupplier, List<SchemaPath> interestingColumns, int segmentColumnsCount) throws IOException {
List<SchemaPath> metastoreInterestingColumns = Optional.ofNullable(basicRequests.interestingColumnsAndPartitionKeys(tableInfo).interestingColumns()).map(metastoreInterestingColumnNames -> metastoreInterestingColumnNames.stream().map(SchemaPath::parseFromString).collect(Collectors.toList())).orElse(null);
Map<String, Long> filesNamesLastModifiedTime = basicRequests.filesLastModifiedTime(tableInfo, null, null);
List<String> newFiles = new ArrayList<>();
List<String> updatedFiles = new ArrayList<>();
List<String> removedFiles = new ArrayList<>(filesNamesLastModifiedTime.keySet());
List<String> allFiles = new ArrayList<>();
for (FileStatus fileStatus : getFileStatuses(selection)) {
String path = Path.getPathWithoutSchemeAndAuthority(fileStatus.getPath()).toUri().getPath();
Long lastModificationTime = filesNamesLastModifiedTime.get(path);
if (lastModificationTime == null) {
newFiles.add(path);
} else if (lastModificationTime < fileStatus.getModificationTime()) {
updatedFiles.add(path);
}
removedFiles.remove(path);
allFiles.add(path);
}
String selectionRoot = selection.getSelection().getSelectionRoot().toUri().getPath();
if (!Objects.equals(metastoreInterestingColumns, interestingColumns) && metastoreInterestingColumns != null && (interestingColumns == null || !metastoreInterestingColumns.containsAll(interestingColumns)) || TableStatisticsKind.ANALYZE_METADATA_LEVEL.getValue(basicRequests.tableMetadata(tableInfo)).compareTo(metadataLevel) != 0) {
// do not update table scan and lists of segments / files / row groups,
// metadata should be recalculated
tableScan = tableScanSupplier.get();
metadataToRemove.addAll(getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0));
return;
}
// checks whether there are no new, updated and removed files
if (!newFiles.isEmpty() || !updatedFiles.isEmpty() || !removedFiles.isEmpty()) {
List<String> scanFiles = new ArrayList<>(newFiles);
scanFiles.addAll(updatedFiles);
// updates scan to read updated / new files
tableScan = getTableScan(settings, tableScanSupplier.get(), scanFiles);
// iterates from the end;
// takes deepest updated segments;
// finds their parents:
// - fetches all segments for parent level;
// - filters segments to leave parents only;
// obtains all child segments;
// filters child segments for filtered parent segments
int lastSegmentIndex = segmentColumnsCount - 1;
List<String> scanAndRemovedFiles = new ArrayList<>(scanFiles);
scanAndRemovedFiles.addAll(removedFiles);
// 1. Obtain files info for files from the same folder without removed files
// 2. Get segments for obtained files + segments for removed files
// 3. Get parent segments
// 4. Get other segments for the same parent segment
// 5. Remove segments which have only removed files (matched for removedFileInfo and don't match to filesInfo)
// 6. Do the same for parent segments
List<MetadataInfo> allFilesInfo = getMetadataInfoList(selectionRoot, allFiles, MetadataType.FILE, 0);
// first pass: collect updated segments even without files, they will be removed later
List<MetadataInfo> leafSegments = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, lastSegmentIndex);
List<MetadataInfo> removedFilesMetadata = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.FILE, 0);
List<MetadataInfo> scanFilesInfo = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.FILE, 0);
// files from scan + files from the same folder without removed files
filesInfo = leafSegments.stream().filter(parent -> scanFilesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).flatMap(parent -> allFilesInfo.stream().filter(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
Multimap<Integer, MetadataInfo> allSegments = populateSegments(removedFiles, allFiles, selectionRoot, lastSegmentIndex, leafSegments, removedFilesMetadata);
List<MetadataInfo> allRowGroupsInfo = getAllRowGroupsMetadataInfos(allFiles);
rowGroupsInfo = allRowGroupsInfo.stream().filter(child -> filesInfo.stream().map(MetadataInfo::identifier).anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent, child.identifier()))).collect(Collectors.toList());
List<MetadataInfo> segmentsToUpdate = getMetadataInfoList(selectionRoot, scanAndRemovedFiles, MetadataType.SEGMENT, 0);
allMetaToHandle = Streams.concat(allSegments.values().stream(), allFilesInfo.stream(), allRowGroupsInfo.stream()).filter(child -> segmentsToUpdate.stream().anyMatch(parent -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).filter(parent -> removedFilesMetadata.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) || filesInfo.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
// removed top-level segments are handled separately since their metadata is not overridden when producing writing to the Metastore
List<MetadataInfo> removedTopSegments = getMetadataInfoList(selectionRoot, removedFiles, MetadataType.SEGMENT, 0).stream().filter(parent -> removedFilesMetadata.stream().anyMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier())) && allFilesInfo.stream().noneMatch(child -> MetadataIdentifierUtils.isMetadataKeyParent(parent.identifier(), child.identifier()))).collect(Collectors.toList());
metadataToRemove.addAll(removedTopSegments);
segmentsToUpdate.stream().filter(segment -> !removedTopSegments.contains(segment)).forEach(allMetaToHandle::add);
} else {
// table metadata may still be actual
outdated = false;
}
}
Aggregations