Search in sources :

Example 6 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class TestJoinTranslator method testTranslateStreamToTableJoin.

private void testTranslateStreamToTableJoin(boolean isRemoteTable) throws IOException, ClassNotFoundException {
    // setup mock values to the constructor of JoinTranslator
    final String logicalOpId = "sql0_join3";
    final int queryId = 0;
    LogicalJoin mockJoin = PowerMockito.mock(LogicalJoin.class);
    TranslatorContext mockTranslatorContext = mock(TranslatorContext.class);
    RelNode mockLeftInput = PowerMockito.mock(LogicalTableScan.class);
    RelNode mockRightInput = mock(RelNode.class);
    List<RelNode> inputs = new ArrayList<>();
    inputs.add(mockLeftInput);
    inputs.add(mockRightInput);
    RelOptTable mockLeftTable = mock(RelOptTable.class);
    when(mockLeftInput.getTable()).thenReturn(mockLeftTable);
    List<String> qualifiedTableName = Arrays.asList("test", "LeftTable");
    when(mockLeftTable.getQualifiedName()).thenReturn(qualifiedTableName);
    when(mockLeftInput.getId()).thenReturn(1);
    when(mockRightInput.getId()).thenReturn(2);
    when(mockJoin.getId()).thenReturn(3);
    when(mockJoin.getInputs()).thenReturn(inputs);
    when(mockJoin.getLeft()).thenReturn(mockLeftInput);
    when(mockJoin.getRight()).thenReturn(mockRightInput);
    RexCall mockJoinCondition = mock(RexCall.class);
    when(mockJoinCondition.isAlwaysTrue()).thenReturn(false);
    when(mockJoinCondition.getKind()).thenReturn(SqlKind.EQUALS);
    when(mockJoin.getCondition()).thenReturn(mockJoinCondition);
    RexInputRef mockLeftConditionInput = mock(RexInputRef.class);
    RexInputRef mockRightConditionInput = mock(RexInputRef.class);
    when(mockLeftConditionInput.getIndex()).thenReturn(0);
    when(mockRightConditionInput.getIndex()).thenReturn(0);
    List<RexNode> condOperands = new ArrayList<>();
    condOperands.add(mockLeftConditionInput);
    condOperands.add(mockRightConditionInput);
    when(mockJoinCondition.getOperands()).thenReturn(condOperands);
    RelDataType mockLeftCondDataType = mock(RelDataType.class);
    RelDataType mockRightCondDataType = mock(RelDataType.class);
    when(mockLeftCondDataType.getSqlTypeName()).thenReturn(SqlTypeName.BOOLEAN);
    when(mockRightCondDataType.getSqlTypeName()).thenReturn(SqlTypeName.BOOLEAN);
    when(mockLeftConditionInput.getType()).thenReturn(mockLeftCondDataType);
    when(mockRightConditionInput.getType()).thenReturn(mockRightCondDataType);
    RelDataType mockLeftRowType = mock(RelDataType.class);
    // ?? why ??
    when(mockLeftRowType.getFieldCount()).thenReturn(0);
    when(mockLeftInput.getRowType()).thenReturn(mockLeftRowType);
    List<String> leftFieldNames = Collections.singletonList("test_table_field1");
    List<String> rightStreamFieldNames = Collections.singletonList("test_stream_field1");
    when(mockLeftRowType.getFieldNames()).thenReturn(leftFieldNames);
    RelDataType mockRightRowType = mock(RelDataType.class);
    when(mockRightInput.getRowType()).thenReturn(mockRightRowType);
    when(mockRightRowType.getFieldNames()).thenReturn(rightStreamFieldNames);
    StreamApplicationDescriptorImpl mockAppDesc = mock(StreamApplicationDescriptorImpl.class);
    OperatorSpec<Object, SamzaSqlRelMessage> mockLeftInputOp = mock(OperatorSpec.class);
    MessageStream<SamzaSqlRelMessage> mockLeftInputStream = new MessageStreamImpl<>(mockAppDesc, mockLeftInputOp);
    when(mockTranslatorContext.getMessageStream(eq(mockLeftInput.getId()))).thenReturn(mockLeftInputStream);
    OperatorSpec<Object, SamzaSqlRelMessage> mockRightInputOp = mock(OperatorSpec.class);
    MessageStream<SamzaSqlRelMessage> mockRightInputStream = new MessageStreamImpl<>(mockAppDesc, mockRightInputOp);
    when(mockTranslatorContext.getMessageStream(eq(mockRightInput.getId()))).thenReturn(mockRightInputStream);
    when(mockTranslatorContext.getStreamAppDescriptor()).thenReturn(mockAppDesc);
    InputOperatorSpec mockInputOp = mock(InputOperatorSpec.class);
    OutputStreamImpl mockOutputStream = mock(OutputStreamImpl.class);
    when(mockInputOp.isKeyed()).thenReturn(true);
    when(mockOutputStream.isKeyed()).thenReturn(true);
    doAnswer(this.getRegisterMessageStreamAnswer()).when(mockTranslatorContext).registerMessageStream(eq(3), any(MessageStream.class));
    RexToJavaCompiler mockCompiler = mock(RexToJavaCompiler.class);
    when(mockTranslatorContext.getExpressionCompiler()).thenReturn(mockCompiler);
    Expression mockExpr = mock(Expression.class);
    when(mockCompiler.compile(any(), any())).thenReturn(mockExpr);
    if (isRemoteTable) {
        doAnswer(this.getRegisteredTableAnswer()).when(mockAppDesc).getTable(any(RemoteTableDescriptor.class));
    } else {
        IntermediateMessageStreamImpl mockPartitionedStream = new IntermediateMessageStreamImpl(mockAppDesc, mockInputOp, mockOutputStream);
        when(mockAppDesc.getIntermediateStream(any(String.class), any(Serde.class), eq(false))).thenReturn(mockPartitionedStream);
        doAnswer(this.getRegisteredTableAnswer()).when(mockAppDesc).getTable(any(RocksDbTableDescriptor.class));
    }
    when(mockJoin.getJoinType()).thenReturn(JoinRelType.INNER);
    SamzaSqlExecutionContext mockExecutionContext = mock(SamzaSqlExecutionContext.class);
    when(mockTranslatorContext.getExecutionContext()).thenReturn(mockExecutionContext);
    SamzaSqlApplicationConfig mockAppConfig = mock(SamzaSqlApplicationConfig.class);
    when(mockExecutionContext.getSamzaSqlApplicationConfig()).thenReturn(mockAppConfig);
    Map<String, SqlIOConfig> ssConfigBySource = mock(HashMap.class);
    when(mockAppConfig.getInputSystemStreamConfigBySource()).thenReturn(ssConfigBySource);
    SqlIOConfig mockIOConfig = mock(SqlIOConfig.class);
    TableDescriptor mockTableDesc;
    if (isRemoteTable) {
        mockTableDesc = mock(RemoteTableDescriptor.class);
    } else {
        mockTableDesc = mock(RocksDbTableDescriptor.class);
    }
    when(ssConfigBySource.get(String.join(".", qualifiedTableName))).thenReturn(mockIOConfig);
    when(mockIOConfig.getTableDescriptor()).thenReturn(Optional.of(mockTableDesc));
    JoinTranslator joinTranslator = new JoinTranslator(logicalOpId, "", queryId);
    // Verify Metrics Works with Join
    Context mockContext = mock(Context.class);
    ContainerContext mockContainerContext = mock(ContainerContext.class);
    TestMetricsRegistryImpl testMetricsRegistryImpl = new TestMetricsRegistryImpl();
    when(mockContext.getContainerContext()).thenReturn(mockContainerContext);
    when(mockContainerContext.getContainerMetricsRegistry()).thenReturn(testMetricsRegistryImpl);
    TranslatorInputMetricsMapFunction inputMetricsMF = joinTranslator.getInputMetricsMF();
    assertNotNull(inputMetricsMF);
    inputMetricsMF.init(mockContext);
    TranslatorOutputMetricsMapFunction outputMetricsMF = joinTranslator.getOutputMetricsMF();
    assertNotNull(outputMetricsMF);
    outputMetricsMF.init(mockContext);
    assertEquals(1, testMetricsRegistryImpl.getCounters().size());
    assertEquals(2, testMetricsRegistryImpl.getCounters().get(logicalOpId).size());
    assertEquals(0, testMetricsRegistryImpl.getCounters().get(logicalOpId).get(0).getCount());
    assertEquals(0, testMetricsRegistryImpl.getCounters().get(logicalOpId).get(1).getCount());
    assertEquals(1, testMetricsRegistryImpl.getGauges().size());
    // Apply translate() method to verify that we are getting the correct map operator constructed
    joinTranslator.translate(mockJoin, mockTranslatorContext);
    // make sure that context has been registered with LogicFilter and output message streams
    verify(mockTranslatorContext, times(1)).registerMessageStream(3, this.getRegisteredMessageStream(3));
    when(mockTranslatorContext.getRelNode(3)).thenReturn(mockJoin);
    when(mockTranslatorContext.getMessageStream(3)).thenReturn(this.getRegisteredMessageStream(3));
    StreamTableJoinOperatorSpec joinSpec = (StreamTableJoinOperatorSpec) Whitebox.getInternalState(this.getRegisteredMessageStream(3), "operatorSpec");
    assertNotNull(joinSpec);
    assertEquals(joinSpec.getOpCode(), OperatorSpec.OpCode.JOIN);
    // Verify joinSpec has the corresponding setup
    StreamTableJoinFunction joinFn = joinSpec.getJoinFn();
    assertNotNull(joinFn);
    if (isRemoteTable) {
        assertTrue(joinFn instanceof SamzaSqlRemoteTableJoinFunction);
    } else {
        assertTrue(joinFn instanceof SamzaSqlLocalTableJoinFunction);
    }
    assertTrue(Whitebox.getInternalState(joinFn, "isTablePosOnRight").equals(false));
    assertEquals(Collections.singletonList(0), Whitebox.getInternalState(joinFn, "streamFieldIds"));
    assertEquals(leftFieldNames, Whitebox.getInternalState(joinFn, "tableFieldNames"));
    List<String> outputFieldNames = new ArrayList<>();
    outputFieldNames.addAll(leftFieldNames);
    outputFieldNames.addAll(rightStreamFieldNames);
    assertEquals(outputFieldNames, Whitebox.getInternalState(joinFn, "outFieldNames"));
}
Also used : Serde(org.apache.samza.serializers.Serde) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) MessageStreamImpl(org.apache.samza.operators.MessageStreamImpl) OutputStreamImpl(org.apache.samza.operators.spec.OutputStreamImpl) ArrayList(java.util.ArrayList) RelDataType(org.apache.calcite.rel.type.RelDataType) RexCall(org.apache.calcite.rex.RexCall) ContainerContext(org.apache.samza.context.ContainerContext) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) MessageStream(org.apache.samza.operators.MessageStream) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) Context(org.apache.samza.context.Context) ContainerContext(org.apache.samza.context.ContainerContext) InputOperatorSpec(org.apache.samza.operators.spec.InputOperatorSpec) SamzaSqlApplicationConfig(org.apache.samza.sql.runner.SamzaSqlApplicationConfig) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) IntermediateMessageStreamImpl(org.apache.samza.operators.stream.IntermediateMessageStreamImpl) RexToJavaCompiler(org.apache.samza.sql.data.RexToJavaCompiler) TableDescriptor(org.apache.samza.table.descriptors.TableDescriptor) RocksDbTableDescriptor(org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor) RemoteTableDescriptor(org.apache.samza.table.descriptors.RemoteTableDescriptor) TestMetricsRegistryImpl(org.apache.samza.sql.util.TestMetricsRegistryImpl) RelNode(org.apache.calcite.rel.RelNode) Expression(org.apache.samza.sql.data.Expression) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) RexInputRef(org.apache.calcite.rex.RexInputRef) SamzaSqlExecutionContext(org.apache.samza.sql.data.SamzaSqlExecutionContext) RelOptTable(org.apache.calcite.plan.RelOptTable) StreamTableJoinOperatorSpec(org.apache.samza.operators.spec.StreamTableJoinOperatorSpec) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) RexNode(org.apache.calcite.rex.RexNode)

Example 7 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class JoinTranslator method getTable.

private Table getTable(JoinInputNode tableNode, TranslatorContext context) {
    SqlIOConfig sourceTableConfig = resolveSQlIOForTable(tableNode.getRelNode(), context.getExecutionContext().getSamzaSqlApplicationConfig().getInputSystemStreamConfigBySource());
    if (sourceTableConfig == null || !sourceTableConfig.getTableDescriptor().isPresent()) {
        String errMsg = "Failed to resolve table source in join operation: node=" + tableNode.getRelNode();
        log.error(errMsg);
        throw new SamzaException(errMsg);
    }
    Table<KV<SamzaSqlRelRecord, SamzaSqlRelMessage>> table = context.getStreamAppDescriptor().getTable(sourceTableConfig.getTableDescriptor().get());
    if (tableNode.isRemoteTable()) {
        return table;
    }
    // If local table, load the table.
    // Load the local table with the fields in the join condition as composite key and relational message as the value.
    // Send the messages from the input stream denoted as 'table' to the created table store.
    MessageStream<SamzaSqlRelMessage> relOutputStream = context.getMessageStream(tableNode.getRelNode().getId());
    SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde keySerde = (SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) new SamzaSqlRelRecordSerdeFactory().getSerde(null, null);
    SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde valueSerde = (SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde) new SamzaSqlRelMessageSerdeFactory().getSerde(null, null);
    List<Integer> tableKeyIds = tableNode.getKeyIds();
    // Let's always repartition by the join fields as key before sending the key and value to the table.
    // We need to repartition the stream denoted as table to ensure that both the stream and table that are joined
    // have the same partitioning scheme with the same partition key and number. Please note that bootstrap semantic is
    // not propagated to the intermediate streams. Please refer SAMZA-1613 for more details on this. Subsequently, the
    // results are consistent only after the local table is caught up.
    relOutputStream.partitionBy(m -> createSamzaSqlCompositeKey(m, tableKeyIds), m -> m, KVSerde.of(keySerde, valueSerde), intermediateStreamPrefix + "table_" + logicalOpId).sendTo(table);
    return table;
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) LoggerFactory(org.slf4j.LoggerFactory) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) RexNode(org.apache.calcite.rex.RexNode) Map(java.util.Map) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) LinkedList(java.util.LinkedList) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) SqlKind(org.apache.calcite.sql.SqlKind) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) RexLiteral(org.apache.calcite.rex.RexLiteral) SqlExplainLevel(org.apache.calcite.sql.SqlExplainLevel) SamzaSqlRelMessage.getSamzaSqlCompositeKeyFieldNames(org.apache.samza.sql.data.SamzaSqlRelMessage.getSamzaSqlCompositeKeyFieldNames) RelNode(org.apache.calcite.rel.RelNode) Collectors(java.util.stream.Collectors) SamzaSqlRelMessage.createSamzaSqlCompositeKey(org.apache.samza.sql.data.SamzaSqlRelMessage.createSamzaSqlCompositeKey) SamzaException(org.apache.samza.SamzaException) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) Validate(org.apache.commons.lang3.Validate) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) JoinRelType(org.apache.calcite.rel.core.JoinRelType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) RexShuttle(org.apache.calcite.rex.RexShuttle) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) Collections(java.util.Collections) SqlExplainFormat(org.apache.calcite.sql.SqlExplainFormat) RexCall(org.apache.calcite.rex.RexCall) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) KV(org.apache.samza.operators.KV) SamzaException(org.apache.samza.SamzaException) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 8 with SqlIOConfig

use of org.apache.samza.sql.interfaces.SqlIOConfig in project samza by apache.

the class JoinTranslator method resolveSQlIOForTable.

static SqlIOConfig resolveSQlIOForTable(RelNode relNode, Map<String, SqlIOConfig> systemStreamConfigBySource) {
    if (relNode instanceof HepRelVertex) {
        return resolveSQlIOForTable(((HepRelVertex) relNode).getCurrentRel(), systemStreamConfigBySource);
    }
    if (relNode instanceof LogicalProject) {
        return resolveSQlIOForTable(((LogicalProject) relNode).getInput(), systemStreamConfigBySource);
    }
    if (relNode instanceof LogicalFilter) {
        return resolveSQlIOForTable(((LogicalFilter) relNode).getInput(), systemStreamConfigBySource);
    }
    // is considered a stream. Hence, we return null for the table.
    if (relNode instanceof LogicalJoin && relNode.getInputs().size() > 1) {
        return null;
    }
    if (!(relNode instanceof TableScan)) {
        throw new SamzaException(String.format("Unsupported query. relNode %s is not of type TableScan.", relNode.toString()));
    }
    String sourceName = SqlIOConfig.getSourceFromSourceParts(relNode.getTable().getQualifiedName());
    SqlIOConfig sourceConfig = systemStreamConfigBySource.get(sourceName);
    if (sourceConfig == null) {
        throw new SamzaException("Unsupported source found in join statement: " + sourceName);
    }
    return sourceConfig;
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) TableScan(org.apache.calcite.rel.core.TableScan) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) SamzaException(org.apache.samza.SamzaException)

Aggregations

SqlIOConfig (org.apache.samza.sql.interfaces.SqlIOConfig)8 SamzaException (org.apache.samza.SamzaException)5 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)4 SamzaSqlRelMessage (org.apache.samza.sql.data.SamzaSqlRelMessage)4 RelNode (org.apache.calcite.rel.RelNode)3 TableScan (org.apache.calcite.rel.core.TableScan)3 LogicalFilter (org.apache.calcite.rel.logical.LogicalFilter)3 LogicalProject (org.apache.calcite.rel.logical.LogicalProject)3 MessageStream (org.apache.samza.operators.MessageStream)3 SamzaSqlApplicationConfig (org.apache.samza.sql.runner.SamzaSqlApplicationConfig)3 VisibleForTesting (com.google.common.annotations.VisibleForTesting)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 LinkedList (java.util.LinkedList)2 Map (java.util.Map)2 HepRelVertex (org.apache.calcite.plan.hep.HepRelVertex)2 RelDataType (org.apache.calcite.rel.type.RelDataType)2 RexCall (org.apache.calcite.rex.RexCall)2 RexInputRef (org.apache.calcite.rex.RexInputRef)2 RexNode (org.apache.calcite.rex.RexNode)2