use of org.apache.calcite.rel.logical.LogicalJoin in project samza by apache.
the class JoinTranslator method validateJoinQuery.
private void validateJoinQuery(LogicalJoin join, JoinInputNode.InputType inputTypeOnLeft, JoinInputNode.InputType inputTypeOnRight) {
JoinRelType joinRelType = join.getJoinType();
if (joinRelType.compareTo(JoinRelType.INNER) != 0 && joinRelType.compareTo(JoinRelType.LEFT) != 0 && joinRelType.compareTo(JoinRelType.RIGHT) != 0) {
throw new SamzaException("Query with only INNER and LEFT/RIGHT OUTER join are supported.");
}
boolean isTablePosOnLeft = inputTypeOnLeft != JoinInputNode.InputType.STREAM;
boolean isTablePosOnRight = inputTypeOnRight != JoinInputNode.InputType.STREAM;
if (!isTablePosOnLeft && !isTablePosOnRight) {
throw new SamzaException("Invalid query with both sides of join being denoted as 'stream'. " + "Stream-stream join is not yet supported. " + dumpRelPlanForNode(join));
}
if (isTablePosOnLeft && isTablePosOnRight) {
throw new SamzaException("Invalid query with both sides of join being denoted as 'table'. " + dumpRelPlanForNode(join));
}
if (joinRelType.compareTo(JoinRelType.LEFT) == 0 && isTablePosOnLeft) {
throw new SamzaException("Invalid query for outer left join. Left side of the join should be a 'stream' and " + "right side of join should be a 'table'. " + dumpRelPlanForNode(join));
}
if (joinRelType.compareTo(JoinRelType.RIGHT) == 0 && isTablePosOnRight) {
throw new SamzaException("Invalid query for outer right join. Left side of the join should be a 'table' and " + "right side of join should be a 'stream'. " + dumpRelPlanForNode(join));
}
final List<RexNode> conjunctionList = new ArrayList<>();
decomposeAndValidateConjunction(join.getCondition(), conjunctionList);
if (conjunctionList.isEmpty()) {
throw new SamzaException("Query results in a cross join, which is not supported. Please optimize the query." + " It is expected that the joins should include JOIN ON operator in the sql query.");
}
// TODO Not sure why we can not allow literal as part of the join condition will revisit this in another scope
conjunctionList.forEach(rexNode -> rexNode.accept(new RexShuttle() {
@Override
public RexNode visitLiteral(RexLiteral literal) {
throw new SamzaException("Join Condition can not allow literal " + literal.toString() + " join node" + join.getDigest());
}
}));
final JoinInputNode.InputType rootTableInput = isTablePosOnRight ? inputTypeOnRight : inputTypeOnLeft;
if (rootTableInput.compareTo(JoinInputNode.InputType.REMOTE_TABLE) != 0) {
// it is not a remote table all is good we do not have to validate the project on key Column
return;
}
/*
For remote Table we need to validate The join Condition and The project that is above remote table scan.
- As of today Filter need to be exactly one equi-join using the __key__ column (see SAMZA-2554)
- The Project on the top of the remote table has to contain only simple input references to any of the column used in the join.
*/
// First let's collect the ref of columns used by the join condition.
List<RexInputRef> refCollector = new ArrayList<>();
join.getCondition().accept(new RexShuttle() {
@Override
public RexNode visitInputRef(RexInputRef inputRef) {
refCollector.add(inputRef);
return inputRef;
}
});
// start index of the Remote table within the Join Row
final int tableStartIndex = isTablePosOnRight ? join.getLeft().getRowType().getFieldCount() : 0;
// end index of the Remote table withing the Join Row
final int tableEndIndex = isTablePosOnRight ? join.getRowType().getFieldCount() : join.getLeft().getRowType().getFieldCount();
List<Integer> tableRefsIdx = refCollector.stream().map(x -> x.getIndex()).filter(// collect all the refs form table side
x -> tableStartIndex <= x && x < tableEndIndex).map(// re-adjust the offset
x -> x - tableStartIndex).sorted().collect(// we have a list with all the input from table side with 0 based index.
Collectors.toList());
if (conjunctionList.size() != 1 || tableRefsIdx.size() != 1) {
// TODO We can relax this by allowing another filter to be evaluated post lookup see SAMZA-2554
throw new SamzaException("Invalid query for join condition must contain exactly one predicate for remote table on __key__ column " + dumpRelPlanForNode(join));
}
// Validate the Project, follow each input and ensure that it is a simple ref with no rexCall in the way.
if (!isValidRemoteJoinRef(tableRefsIdx.get(0), isTablePosOnRight ? join.getRight() : join.getLeft())) {
throw new SamzaException("Invalid query for join condition can not have an expression and must be reference " + SamzaSqlRelMessage.KEY_NAME + " column " + dumpRelPlanForNode(join));
}
}
use of org.apache.calcite.rel.logical.LogicalJoin in project samza by apache.
the class JoinTranslator method resolveSQlIOForTable.
static SqlIOConfig resolveSQlIOForTable(RelNode relNode, Map<String, SqlIOConfig> systemStreamConfigBySource) {
if (relNode instanceof HepRelVertex) {
return resolveSQlIOForTable(((HepRelVertex) relNode).getCurrentRel(), systemStreamConfigBySource);
}
if (relNode instanceof LogicalProject) {
return resolveSQlIOForTable(((LogicalProject) relNode).getInput(), systemStreamConfigBySource);
}
if (relNode instanceof LogicalFilter) {
return resolveSQlIOForTable(((LogicalFilter) relNode).getInput(), systemStreamConfigBySource);
}
// is considered a stream. Hence, we return null for the table.
if (relNode instanceof LogicalJoin && relNode.getInputs().size() > 1) {
return null;
}
if (!(relNode instanceof TableScan)) {
throw new SamzaException(String.format("Unsupported query. relNode %s is not of type TableScan.", relNode.toString()));
}
String sourceName = SqlIOConfig.getSourceFromSourceParts(relNode.getTable().getQualifiedName());
SqlIOConfig sourceConfig = systemStreamConfigBySource.get(sourceName);
if (sourceConfig == null) {
throw new SamzaException("Unsupported source found in join statement: " + sourceName);
}
return sourceConfig;
}
use of org.apache.calcite.rel.logical.LogicalJoin in project samza by apache.
the class QueryTranslator method translate.
/**
* Translate Calcite plan to Samza stream operators.
* @param relRoot Calcite plan in the form of {@link RelRoot}. RelRoot should not include the sink ({@link TableModify})
* @param outputSystemStream Sink associated with the Calcite plan.
* @param translatorContext Context maintained across translations.
* @param queryId query index of the sql statement corresponding to the Calcite plan in multi SQL statement scenario
* starting with index 0.
*/
public void translate(RelRoot relRoot, String outputSystemStream, TranslatorContext translatorContext, int queryId) {
final RelNode node = relRoot.project();
ScanTranslator scanTranslator = new ScanTranslator(sqlConfig.getSamzaRelConverters(), sqlConfig.getInputSystemStreamConfigBySource(), queryId);
/* update input metrics */
String queryLogicalId = String.format(TranslatorConstants.LOGSQLID_TEMPLATE, queryId);
opId = 0;
node.accept(new RelShuttleImpl() {
@Override
public RelNode visit(RelNode relNode) {
// There should never be a TableModify in the calcite plan.
Validate.isTrue(!(relNode instanceof TableModify));
return super.visit(relNode);
}
@Override
public RelNode visit(TableScan scan) {
RelNode node = super.visit(scan);
String logicalOpId = String.format(TranslatorConstants.LOGOPID_TEMPLATE, queryId, "scan", opId++);
scanTranslator.translate(scan, queryLogicalId, logicalOpId, translatorContext, systemDescriptors, inputMsgStreams);
return node;
}
@Override
public RelNode visit(LogicalFilter filter) {
RelNode node = visitChild(filter, 0, filter.getInput());
String logicalOpId = String.format(TranslatorConstants.LOGOPID_TEMPLATE, queryId, "filter", opId++);
new FilterTranslator(queryId).translate(filter, logicalOpId, translatorContext);
return node;
}
@Override
public RelNode visit(LogicalProject project) {
RelNode node = super.visit(project);
String logicalOpId = String.format(TranslatorConstants.LOGOPID_TEMPLATE, queryId, "project", opId++);
new ProjectTranslator(queryId).translate(project, logicalOpId, translatorContext);
return node;
}
@Override
public RelNode visit(LogicalJoin join) {
RelNode node = super.visit(join);
String logicalOpId = String.format(TranslatorConstants.LOGOPID_TEMPLATE, queryId, "join", opId++);
new JoinTranslator(logicalOpId, sqlConfig.getMetadataTopicPrefix(), queryId).translate(join, translatorContext);
return node;
}
@Override
public RelNode visit(LogicalAggregate aggregate) {
RelNode node = super.visit(aggregate);
String logicalOpId = String.format(TranslatorConstants.LOGOPID_TEMPLATE, queryId, "window", opId++);
new LogicalAggregateTranslator(logicalOpId, sqlConfig.getMetadataTopicPrefix()).translate(aggregate, translatorContext);
return node;
}
});
String logicalOpId = String.format(TranslatorConstants.LOGOPID_TEMPLATE, queryId, "insert", opId);
sendToOutputStream(queryLogicalId, logicalOpId, outputSystemStream, streamAppDescriptor, translatorContext, node, queryId);
}
use of org.apache.calcite.rel.logical.LogicalJoin in project calcite by apache.
the class EnumerableMergeJoinRule method convert.
@Override
public RelNode convert(RelNode rel) {
LogicalJoin join = (LogicalJoin) rel;
final JoinInfo info = JoinInfo.of(join.getLeft(), join.getRight(), join.getCondition());
if (join.getJoinType() != JoinRelType.INNER) {
// (It supports non-equi join, using a post-filter; see below.)
return null;
}
if (info.pairs().size() == 0) {
// EnumerableMergeJoin CAN support cartesian join, but disable it for now.
return null;
}
final List<RelNode> newInputs = Lists.newArrayList();
final List<RelCollation> collations = Lists.newArrayList();
int offset = 0;
for (Ord<RelNode> ord : Ord.zip(join.getInputs())) {
RelTraitSet traits = ord.e.getTraitSet().replace(EnumerableConvention.INSTANCE);
if (!info.pairs().isEmpty()) {
final List<RelFieldCollation> fieldCollations = Lists.newArrayList();
for (int key : info.keys().get(ord.i)) {
fieldCollations.add(new RelFieldCollation(key, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.LAST));
}
final RelCollation collation = RelCollations.of(fieldCollations);
collations.add(RelCollations.shift(collation, offset));
traits = traits.replace(collation);
}
newInputs.add(convert(ord.e, traits));
offset += ord.e.getRowType().getFieldCount();
}
final RelNode left = newInputs.get(0);
final RelNode right = newInputs.get(1);
final RelOptCluster cluster = join.getCluster();
RelNode newRel;
try {
RelTraitSet traits = join.getTraitSet().replace(EnumerableConvention.INSTANCE);
if (!collations.isEmpty()) {
traits = traits.replace(collations);
}
newRel = new EnumerableMergeJoin(cluster, traits, left, right, info.getEquiCondition(left, right, cluster.getRexBuilder()), info.leftKeys, info.rightKeys, join.getVariablesSet(), join.getJoinType());
} catch (InvalidRelException e) {
EnumerableRules.LOGGER.debug(e.toString());
return null;
}
if (!info.isEqui()) {
newRel = new EnumerableFilter(cluster, newRel.getTraitSet(), newRel, info.getRemaining(cluster.getRexBuilder()));
}
return newRel;
}
use of org.apache.calcite.rel.logical.LogicalJoin in project calcite by apache.
the class RelMetadataTest method checkAverageRowSize.
private void checkAverageRowSize(RelOptCluster cluster, RelOptTable empTable, RelOptTable deptTable) {
final RexBuilder rexBuilder = cluster.getRexBuilder();
final RelMetadataQuery mq = RelMetadataQuery.instance();
final LogicalTableScan empScan = LogicalTableScan.create(cluster, empTable);
Double rowSize = mq.getAverageRowSize(empScan);
List<Double> columnSizes = mq.getAverageColumnSizes(empScan);
assertThat(columnSizes.size(), equalTo(empScan.getRowType().getFieldCount()));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 40.0, 20.0, 4.0, 8.0, 4.0, 4.0, 4.0, 1.0)));
assertThat(rowSize, equalTo(89.0));
// Empty values
final LogicalValues emptyValues = LogicalValues.createEmpty(cluster, empTable.getRowType());
rowSize = mq.getAverageRowSize(emptyValues);
columnSizes = mq.getAverageColumnSizes(emptyValues);
assertThat(columnSizes.size(), equalTo(emptyValues.getRowType().getFieldCount()));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 40.0, 20.0, 4.0, 8.0, 4.0, 4.0, 4.0, 1.0)));
assertThat(rowSize, equalTo(89.0));
// Values
final RelDataType rowType = cluster.getTypeFactory().builder().add("a", SqlTypeName.INTEGER).add("b", SqlTypeName.VARCHAR).add("c", SqlTypeName.VARCHAR).build();
final ImmutableList.Builder<ImmutableList<RexLiteral>> tuples = ImmutableList.builder();
addRow(tuples, rexBuilder, 1, "1234567890", "ABC");
addRow(tuples, rexBuilder, 2, "1", "A");
addRow(tuples, rexBuilder, 3, "2", null);
final LogicalValues values = LogicalValues.create(cluster, rowType, tuples.build());
rowSize = mq.getAverageRowSize(values);
columnSizes = mq.getAverageColumnSizes(values);
assertThat(columnSizes.size(), equalTo(values.getRowType().getFieldCount()));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 8.0, 3.0)));
assertThat(rowSize, equalTo(15.0));
// Union
final LogicalUnion union = LogicalUnion.create(ImmutableList.<RelNode>of(empScan, emptyValues), true);
rowSize = mq.getAverageRowSize(union);
columnSizes = mq.getAverageColumnSizes(union);
assertThat(columnSizes.size(), equalTo(9));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 40.0, 20.0, 4.0, 8.0, 4.0, 4.0, 4.0, 1.0)));
assertThat(rowSize, equalTo(89.0));
// Filter
final LogicalTableScan deptScan = LogicalTableScan.create(cluster, deptTable);
final LogicalFilter filter = LogicalFilter.create(deptScan, rexBuilder.makeCall(SqlStdOperatorTable.LESS_THAN, rexBuilder.makeInputRef(deptScan, 0), rexBuilder.makeExactLiteral(BigDecimal.TEN)));
rowSize = mq.getAverageRowSize(filter);
columnSizes = mq.getAverageColumnSizes(filter);
assertThat(columnSizes.size(), equalTo(2));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 20.0)));
assertThat(rowSize, equalTo(24.0));
// Project
final LogicalProject deptProject = LogicalProject.create(filter, ImmutableList.of(rexBuilder.makeInputRef(filter, 0), rexBuilder.makeInputRef(filter, 1), rexBuilder.makeCall(SqlStdOperatorTable.PLUS, rexBuilder.makeInputRef(filter, 0), rexBuilder.makeExactLiteral(BigDecimal.ONE)), rexBuilder.makeCall(SqlStdOperatorTable.CHAR_LENGTH, rexBuilder.makeInputRef(filter, 1))), (List<String>) null);
rowSize = mq.getAverageRowSize(deptProject);
columnSizes = mq.getAverageColumnSizes(deptProject);
assertThat(columnSizes.size(), equalTo(4));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 20.0, 4.0, 4.0)));
assertThat(rowSize, equalTo(32.0));
// Join
final LogicalJoin join = LogicalJoin.create(empScan, deptProject, rexBuilder.makeLiteral(true), ImmutableSet.<CorrelationId>of(), JoinRelType.INNER);
rowSize = mq.getAverageRowSize(join);
columnSizes = mq.getAverageColumnSizes(join);
assertThat(columnSizes.size(), equalTo(13));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 40.0, 20.0, 4.0, 8.0, 4.0, 4.0, 4.0, 1.0, 4.0, 20.0, 4.0, 4.0)));
assertThat(rowSize, equalTo(121.0));
// Aggregate
final LogicalAggregate aggregate = LogicalAggregate.create(join, ImmutableBitSet.of(2, 0), ImmutableList.<ImmutableBitSet>of(), ImmutableList.of(AggregateCall.create(SqlStdOperatorTable.COUNT, false, false, ImmutableIntList.of(), -1, 2, join, null, null)));
rowSize = mq.getAverageRowSize(aggregate);
columnSizes = mq.getAverageColumnSizes(aggregate);
assertThat(columnSizes.size(), equalTo(3));
assertThat(columnSizes, equalTo(Arrays.asList(4.0, 20.0, 8.0)));
assertThat(rowSize, equalTo(32.0));
// Smoke test Parallelism and Memory metadata providers
assertThat(mq.memory(aggregate), nullValue());
assertThat(mq.cumulativeMemoryWithinPhase(aggregate), nullValue());
assertThat(mq.cumulativeMemoryWithinPhaseSplit(aggregate), nullValue());
assertThat(mq.isPhaseTransition(aggregate), is(false));
assertThat(mq.splitCount(aggregate), is(1));
}
Aggregations