use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.
the class BeamIOPushDownRule method onMatch.
// ~ Methods ----------------------------------------------------------------
@Override
public void onMatch(RelOptRuleCall call) {
final BeamIOSourceRel ioSourceRel = call.rel(1);
final BeamSqlTable beamSqlTable = ioSourceRel.getBeamSqlTable();
if (ioSourceRel instanceof BeamPushDownIOSourceRel) {
return;
}
// Nested rows are not supported at the moment
for (RelDataTypeField field : ioSourceRel.getRowType().getFieldList()) {
if (field.getType() instanceof RelRecordType) {
return;
}
}
final Calc calc = call.rel(0);
final RexProgram program = calc.getProgram();
final Pair<ImmutableList<RexNode>, ImmutableList<RexNode>> projectFilter = program.split();
final RelDataType calcInputRowType = program.getInputRowType();
// When predicate push-down is not supported - all filters are unsupported.
final BeamSqlTableFilter tableFilter = beamSqlTable.constructFilter(projectFilter.right);
if (!beamSqlTable.supportsProjects().isSupported() && tableFilter instanceof DefaultTableFilter) {
// Either project or filter push-down must be supported by the IO.
return;
}
Set<String> usedFields = new LinkedHashSet<>();
if (!(tableFilter instanceof DefaultTableFilter) && !beamSqlTable.supportsProjects().isSupported()) {
// When applying standalone filter push-down all fields must be project by an IO.
// With a single exception: Calc projects all fields (in the same order) and does nothing
// else.
usedFields.addAll(calcInputRowType.getFieldNames());
} else {
// Find all input refs used by projects
for (RexNode project : projectFilter.left) {
findUtilizedInputRefs(calcInputRowType, project, usedFields);
}
// Find all input refs used by filters
for (RexNode filter : tableFilter.getNotSupported()) {
findUtilizedInputRefs(calcInputRowType, filter, usedFields);
}
}
if (usedFields.isEmpty()) {
// No need to do push-down for queries like this: "select UPPER('hello')".
return;
}
// IO only projects fields utilized by a calc.
if (tableFilter.getNotSupported().containsAll(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
FieldAccessDescriptor resolved = FieldAccessDescriptor.withFieldNames(usedFields);
resolved = resolved.resolve(beamSqlTable.getSchema());
if (canDropCalc(program, beamSqlTable.supportsProjects(), tableFilter)) {
call.transformTo(ioSourceRel.createPushDownRel(calc.getRowType(), resolved.getFieldsAccessed().stream().map(FieldDescriptor::getFieldName).collect(Collectors.toList()), tableFilter));
return;
}
// IO only projects fields utilised by a calc.
if (tableFilter.getNotSupported().equals(projectFilter.right) && usedFields.containsAll(ioSourceRel.getRowType().getFieldNames())) {
return;
}
RelNode result = constructNodesWithPushDown(resolved, call.builder(), ioSourceRel, tableFilter, calc.getRowType(), projectFilter.left);
if (tableFilter.getNotSupported().size() <= projectFilter.right.size() || usedFields.size() < calcInputRowType.getFieldCount()) {
// Smaller Calc programs are indisputably better, as well as IOs with less projected fields.
// We can consider something with the same number of filters.
call.transformTo(result);
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.
the class BeamJoinRel method extractJoinRexNodes.
static List<Pair<RexNode, RexNode>> extractJoinRexNodes(RexNode condition) {
// or it's a JOIN ON false because: condition == false
if (condition instanceof RexLiteral) {
throw new UnsupportedOperationException("CROSS JOIN, JOIN ON FALSE is not supported!");
}
RexCall call = (RexCall) condition;
List<Pair<RexNode, RexNode>> pairs = new ArrayList<>();
if ("AND".equals(call.getOperator().getName())) {
List<RexNode> operands = call.getOperands();
for (RexNode rexNode : operands) {
Pair<RexNode, RexNode> pair = extractJoinPairOfRexNodes((RexCall) rexNode);
pairs.add(pair);
}
} else if ("=".equals(call.getOperator().getName())) {
pairs.add(extractJoinPairOfRexNodes(call));
} else {
throw new UnsupportedOperationException("Operator " + call.getOperator().getName() + " is not supported in join condition");
}
return pairs;
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.
the class ExpressionConverter method convertRexNodeFromComputedColumnWithFieldList.
private RexNode convertRexNodeFromComputedColumnWithFieldList(ResolvedComputedColumn column, List<ResolvedColumn> columnList, List<RelDataTypeField> fieldList, int windowFieldIndex) {
if (column.getExpr().nodeKind() != RESOLVED_FUNCTION_CALL) {
return convertRexNodeFromResolvedExpr(column.getExpr(), columnList, fieldList, ImmutableMap.of());
}
ResolvedFunctionCall functionCall = (ResolvedFunctionCall) column.getExpr();
// TODO: is there any other illegal case?
if (functionCall.getFunction().getName().equals(FIXED_WINDOW) || functionCall.getFunction().getName().equals(SLIDING_WINDOW) || functionCall.getFunction().getName().equals(SESSION_WINDOW)) {
throw new ZetaSqlException(functionCall.getFunction().getName() + " shouldn't appear in SELECT exprlist.");
}
if (!functionCall.getFunction().getGroup().equals(PRE_DEFINED_WINDOW_FUNCTIONS)) {
// non-window function should still go through normal FunctionCall conversion process.
return convertRexNodeFromResolvedExpr(column.getExpr(), columnList, fieldList, ImmutableMap.of());
}
// ONLY window_start and window_end should arrive here.
// TODO: Have extra verification here to make sure window start/end functions have the same
// parameter with window function.
List<RexNode> operands = new ArrayList<>();
switch(functionCall.getFunction().getName()) {
case FIXED_WINDOW_START:
case SLIDING_WINDOW_START:
case SESSION_WINDOW_START:
// in Calcite.
case SESSION_WINDOW_END:
return rexBuilder().makeInputRef(fieldList.get(windowFieldIndex).getType(), windowFieldIndex);
case FIXED_WINDOW_END:
operands.add(rexBuilder().makeInputRef(fieldList.get(windowFieldIndex).getType(), windowFieldIndex));
// TODO: check window_end 's duration is the same as it's aggregate window.
operands.add(convertIntervalToRexIntervalLiteral((ResolvedLiteral) functionCall.getArgumentList().get(0)));
return rexBuilder().makeCall(SqlOperators.ZETASQL_TIMESTAMP_ADD, operands);
case SLIDING_WINDOW_END:
operands.add(rexBuilder().makeInputRef(fieldList.get(windowFieldIndex).getType(), windowFieldIndex));
operands.add(convertIntervalToRexIntervalLiteral((ResolvedLiteral) functionCall.getArgumentList().get(1)));
return rexBuilder().makeCall(SqlOperators.ZETASQL_TIMESTAMP_ADD, operands);
default:
throw new UnsupportedOperationException("Does not support window start/end: " + functionCall.getFunction().getName());
}
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.
the class PubsubTableProviderIT method testSQLSelectsPayloadContent.
@Test
public void testSQLSelectsPayloadContent() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "attributes MAP<VARCHAR, VARCHAR>, \n" + "payload ROW< \n" + " id INTEGER, \n" + " name VARCHAR \n" + " > \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES '{ " + "%s" + "\"protoClass\" : \"%s\", " + "\"timestampAttributeKey\" : \"ts\" }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.payload.id, message.payload.name from message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> observedRows.equals(ImmutableSet.of(row(PAYLOAD_SCHEMA, 3, "foo"), row(PAYLOAD_SCHEMA, 5, "bar"), row(PAYLOAD_SCHEMA, 7, "baz")))));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz")));
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
use of org.apache.beam.vendor.calcite.v1_28_0.org.apache.calcite.sql.SqlKind.AND in project beam by apache.
the class PubsubTableProviderIT method testSQLSelectsPayloadContentFlat.
@Test
public void testSQLSelectsPayloadContentFlat() throws Exception {
String createTableString = String.format("CREATE EXTERNAL TABLE message (\n" + "event_timestamp TIMESTAMP, \n" + "id INTEGER, \n" + "name VARCHAR \n" + ") \n" + "TYPE '%s' \n" + "LOCATION '%s' \n" + "TBLPROPERTIES " + " '{ " + " %s" + " \"protoClass\" : \"%s\", " + " \"timestampAttributeKey\" : \"ts\" " + " }'", tableProvider.getTableType(), eventsTopic.topicPath(), payloadFormatParam(), PayloadMessages.SimpleMessage.class.getName());
String queryString = "SELECT message.id, message.name from message";
// Initialize SQL environment and create the pubsub table
BeamSqlEnv sqlEnv = BeamSqlEnv.inMemory(new PubsubTableProvider());
sqlEnv.executeDdl(createTableString);
// Apply the PTransform to query the pubsub topic
PCollection<Row> queryOutput = query(sqlEnv, pipeline, queryString);
// Observe the query results and send success signal after seeing the expected messages
queryOutput.apply("waitForSuccess", resultSignal.signalSuccessWhen(SchemaCoder.of(PAYLOAD_SCHEMA), observedRows -> observedRows.equals(ImmutableSet.of(row(PAYLOAD_SCHEMA, 3, "foo"), row(PAYLOAD_SCHEMA, 5, "bar"), row(PAYLOAD_SCHEMA, 7, "baz")))));
// Start the pipeline
pipeline.run();
// Block until a subscription for this topic exists
eventsTopic.assertSubscriptionEventuallyCreated(pipeline.getOptions().as(GcpOptions.class).getProject(), Duration.standardMinutes(5));
// Start publishing the messages when main pipeline is started and signaling topic is ready
eventsTopic.publish(ImmutableList.of(objectsProvider.messageIdName(ts(1), 3, "foo"), objectsProvider.messageIdName(ts(2), 5, "bar"), objectsProvider.messageIdName(ts(3), 7, "baz")));
// Poll the signaling topic for success message
resultSignal.waitForSuccess(timeout);
}
Aggregations