Search in sources :

Example 1 with SamzaSqlRelMessageSerdeFactory

use of org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory in project samza by apache.

the class JoinTranslator method getTable.

private Table getTable(JoinInputNode tableNode, TranslatorContext context) {
    SqlIOConfig sourceTableConfig = resolveSQlIOForTable(tableNode.getRelNode(), context.getExecutionContext().getSamzaSqlApplicationConfig().getInputSystemStreamConfigBySource());
    if (sourceTableConfig == null || !sourceTableConfig.getTableDescriptor().isPresent()) {
        String errMsg = "Failed to resolve table source in join operation: node=" + tableNode.getRelNode();
        log.error(errMsg);
        throw new SamzaException(errMsg);
    }
    Table<KV<SamzaSqlRelRecord, SamzaSqlRelMessage>> table = context.getStreamAppDescriptor().getTable(sourceTableConfig.getTableDescriptor().get());
    if (tableNode.isRemoteTable()) {
        return table;
    }
    // If local table, load the table.
    // Load the local table with the fields in the join condition as composite key and relational message as the value.
    // Send the messages from the input stream denoted as 'table' to the created table store.
    MessageStream<SamzaSqlRelMessage> relOutputStream = context.getMessageStream(tableNode.getRelNode().getId());
    SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde keySerde = (SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) new SamzaSqlRelRecordSerdeFactory().getSerde(null, null);
    SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde valueSerde = (SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde) new SamzaSqlRelMessageSerdeFactory().getSerde(null, null);
    List<Integer> tableKeyIds = tableNode.getKeyIds();
    // Let's always repartition by the join fields as key before sending the key and value to the table.
    // We need to repartition the stream denoted as table to ensure that both the stream and table that are joined
    // have the same partitioning scheme with the same partition key and number. Please note that bootstrap semantic is
    // not propagated to the intermediate streams. Please refer SAMZA-1613 for more details on this. Subsequently, the
    // results are consistent only after the local table is caught up.
    relOutputStream.partitionBy(m -> createSamzaSqlCompositeKey(m, tableKeyIds), m -> m, KVSerde.of(keySerde, valueSerde), intermediateStreamPrefix + "table_" + logicalOpId).sendTo(table);
    return table;
}
Also used : SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) TableScan(org.apache.calcite.rel.core.TableScan) LogicalFilter(org.apache.calcite.rel.logical.LogicalFilter) LoggerFactory(org.slf4j.LoggerFactory) RelOptUtil(org.apache.calcite.plan.RelOptUtil) ArrayList(java.util.ArrayList) LogicalJoin(org.apache.calcite.rel.logical.LogicalJoin) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) RexNode(org.apache.calcite.rex.RexNode) Map(java.util.Map) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage) LinkedList(java.util.LinkedList) KV(org.apache.samza.operators.KV) MessageStream(org.apache.samza.operators.MessageStream) Table(org.apache.samza.table.Table) SqlKind(org.apache.calcite.sql.SqlKind) Logger(org.slf4j.Logger) LogicalProject(org.apache.calcite.rel.logical.LogicalProject) SqlTypeName(org.apache.calcite.sql.type.SqlTypeName) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) RexLiteral(org.apache.calcite.rex.RexLiteral) SqlExplainLevel(org.apache.calcite.sql.SqlExplainLevel) SamzaSqlRelMessage.getSamzaSqlCompositeKeyFieldNames(org.apache.samza.sql.data.SamzaSqlRelMessage.getSamzaSqlCompositeKeyFieldNames) RelNode(org.apache.calcite.rel.RelNode) Collectors(java.util.stream.Collectors) SamzaSqlRelMessage.createSamzaSqlCompositeKey(org.apache.samza.sql.data.SamzaSqlRelMessage.createSamzaSqlCompositeKey) SamzaException(org.apache.samza.SamzaException) SqlIOConfig(org.apache.samza.sql.interfaces.SqlIOConfig) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) Validate(org.apache.commons.lang3.Validate) SamzaSqlRelRecord(org.apache.samza.sql.SamzaSqlRelRecord) HepRelVertex(org.apache.calcite.plan.hep.HepRelVertex) JoinRelType(org.apache.calcite.rel.core.JoinRelType) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) KVSerde(org.apache.samza.serializers.KVSerde) RexShuttle(org.apache.calcite.rex.RexShuttle) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) Collections(java.util.Collections) SqlExplainFormat(org.apache.calcite.sql.SqlExplainFormat) RexCall(org.apache.calcite.rex.RexCall) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) KV(org.apache.samza.operators.KV) SamzaException(org.apache.samza.SamzaException) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory) SamzaSqlRelMessage(org.apache.samza.sql.data.SamzaSqlRelMessage)

Example 2 with SamzaSqlRelMessageSerdeFactory

use of org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory in project samza by apache.

the class JoinTranslator method joinStreamWithTable.

private MessageStream<SamzaSqlRelMessage> joinStreamWithTable(MessageStream<SamzaSqlRelMessage> inputStream, Table table, JoinInputNode streamNode, JoinInputNode tableNode, LogicalJoin join, TranslatorContext context) {
    List<Integer> streamKeyIds = streamNode.getKeyIds();
    List<Integer> tableKeyIds = tableNode.getKeyIds();
    Validate.isTrue(streamKeyIds.size() == tableKeyIds.size());
    log.info("Joining on the following Stream and Table field(s): ");
    List<String> streamFieldNames = new ArrayList<>(streamNode.getFieldNames());
    List<String> tableFieldNames = new ArrayList<>(tableNode.getFieldNames());
    for (int i = 0; i < streamKeyIds.size(); i++) {
        log.info(streamFieldNames.get(streamKeyIds.get(i)) + " with " + tableFieldNames.get(tableKeyIds.get(i)));
    }
    if (tableNode.isRemoteTable()) {
        String remoteTableName = tableNode.getSourceName();
        StreamTableJoinFunction joinFn = new SamzaSqlRemoteTableJoinFunction(context.getMsgConverter(remoteTableName), context.getTableKeyConverter(remoteTableName), streamNode, tableNode, join.getJoinType(), queryId);
        return inputStream.map(inputMetricsMF).join(table, joinFn);
    }
    // Join with the local table
    StreamTableJoinFunction joinFn = new SamzaSqlLocalTableJoinFunction(streamNode, tableNode, join.getJoinType());
    SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde keySerde = (SamzaSqlRelRecordSerdeFactory.SamzaSqlRelRecordSerde) new SamzaSqlRelRecordSerdeFactory().getSerde(null, null);
    SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde valueSerde = (SamzaSqlRelMessageSerdeFactory.SamzaSqlRelMessageSerde) new SamzaSqlRelMessageSerdeFactory().getSerde(null, null);
    // the names from the stream as the lookup needs to be done based on what is stored in the local table.
    return inputStream.map(inputMetricsMF).partitionBy(m -> createSamzaSqlCompositeKey(m, streamKeyIds, getSamzaSqlCompositeKeyFieldNames(tableFieldNames, tableKeyIds)), m -> m, KVSerde.of(keySerde, valueSerde), intermediateStreamPrefix + "stream_" + logicalOpId).map(KV::getValue).join(table, joinFn);
}
Also used : ArrayList(java.util.ArrayList) SamzaSqlRelRecordSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory) StreamTableJoinFunction(org.apache.samza.operators.functions.StreamTableJoinFunction) SamzaSqlRelMessageSerdeFactory(org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory)

Aggregations

ArrayList (java.util.ArrayList)2 StreamTableJoinFunction (org.apache.samza.operators.functions.StreamTableJoinFunction)2 SamzaSqlRelMessageSerdeFactory (org.apache.samza.sql.serializers.SamzaSqlRelMessageSerdeFactory)2 SamzaSqlRelRecordSerdeFactory (org.apache.samza.sql.serializers.SamzaSqlRelRecordSerdeFactory)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 Collections (java.util.Collections)1 LinkedList (java.util.LinkedList)1 List (java.util.List)1 Map (java.util.Map)1 Collectors (java.util.stream.Collectors)1 RelOptUtil (org.apache.calcite.plan.RelOptUtil)1 HepRelVertex (org.apache.calcite.plan.hep.HepRelVertex)1 RelNode (org.apache.calcite.rel.RelNode)1 JoinRelType (org.apache.calcite.rel.core.JoinRelType)1 TableScan (org.apache.calcite.rel.core.TableScan)1 LogicalFilter (org.apache.calcite.rel.logical.LogicalFilter)1 LogicalJoin (org.apache.calcite.rel.logical.LogicalJoin)1 LogicalProject (org.apache.calcite.rel.logical.LogicalProject)1 RexCall (org.apache.calcite.rex.RexCall)1