use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestJoinTranslator method testTranslateStreamToTableJoin.
private void testTranslateStreamToTableJoin(boolean isRemoteTable) throws IOException, ClassNotFoundException {
// setup mock values to the constructor of JoinTranslator
final String logicalOpId = "sql0_join3";
final int queryId = 0;
LogicalJoin mockJoin = PowerMockito.mock(LogicalJoin.class);
TranslatorContext mockTranslatorContext = mock(TranslatorContext.class);
RelNode mockLeftInput = PowerMockito.mock(LogicalTableScan.class);
RelNode mockRightInput = mock(RelNode.class);
List<RelNode> inputs = new ArrayList<>();
inputs.add(mockLeftInput);
inputs.add(mockRightInput);
RelOptTable mockLeftTable = mock(RelOptTable.class);
when(mockLeftInput.getTable()).thenReturn(mockLeftTable);
List<String> qualifiedTableName = Arrays.asList("test", "LeftTable");
when(mockLeftTable.getQualifiedName()).thenReturn(qualifiedTableName);
when(mockLeftInput.getId()).thenReturn(1);
when(mockRightInput.getId()).thenReturn(2);
when(mockJoin.getId()).thenReturn(3);
when(mockJoin.getInputs()).thenReturn(inputs);
when(mockJoin.getLeft()).thenReturn(mockLeftInput);
when(mockJoin.getRight()).thenReturn(mockRightInput);
RexCall mockJoinCondition = mock(RexCall.class);
when(mockJoinCondition.isAlwaysTrue()).thenReturn(false);
when(mockJoinCondition.getKind()).thenReturn(SqlKind.EQUALS);
when(mockJoin.getCondition()).thenReturn(mockJoinCondition);
RexInputRef mockLeftConditionInput = mock(RexInputRef.class);
RexInputRef mockRightConditionInput = mock(RexInputRef.class);
when(mockLeftConditionInput.getIndex()).thenReturn(0);
when(mockRightConditionInput.getIndex()).thenReturn(0);
List<RexNode> condOperands = new ArrayList<>();
condOperands.add(mockLeftConditionInput);
condOperands.add(mockRightConditionInput);
when(mockJoinCondition.getOperands()).thenReturn(condOperands);
RelDataType mockLeftCondDataType = mock(RelDataType.class);
RelDataType mockRightCondDataType = mock(RelDataType.class);
when(mockLeftCondDataType.getSqlTypeName()).thenReturn(SqlTypeName.BOOLEAN);
when(mockRightCondDataType.getSqlTypeName()).thenReturn(SqlTypeName.BOOLEAN);
when(mockLeftConditionInput.getType()).thenReturn(mockLeftCondDataType);
when(mockRightConditionInput.getType()).thenReturn(mockRightCondDataType);
RelDataType mockLeftRowType = mock(RelDataType.class);
// ?? why ??
when(mockLeftRowType.getFieldCount()).thenReturn(0);
when(mockLeftInput.getRowType()).thenReturn(mockLeftRowType);
List<String> leftFieldNames = Collections.singletonList("test_table_field1");
List<String> rightStreamFieldNames = Collections.singletonList("test_stream_field1");
when(mockLeftRowType.getFieldNames()).thenReturn(leftFieldNames);
RelDataType mockRightRowType = mock(RelDataType.class);
when(mockRightInput.getRowType()).thenReturn(mockRightRowType);
when(mockRightRowType.getFieldNames()).thenReturn(rightStreamFieldNames);
StreamApplicationDescriptorImpl mockAppDesc = mock(StreamApplicationDescriptorImpl.class);
OperatorSpec<Object, SamzaSqlRelMessage> mockLeftInputOp = mock(OperatorSpec.class);
MessageStream<SamzaSqlRelMessage> mockLeftInputStream = new MessageStreamImpl<>(mockAppDesc, mockLeftInputOp);
when(mockTranslatorContext.getMessageStream(eq(mockLeftInput.getId()))).thenReturn(mockLeftInputStream);
OperatorSpec<Object, SamzaSqlRelMessage> mockRightInputOp = mock(OperatorSpec.class);
MessageStream<SamzaSqlRelMessage> mockRightInputStream = new MessageStreamImpl<>(mockAppDesc, mockRightInputOp);
when(mockTranslatorContext.getMessageStream(eq(mockRightInput.getId()))).thenReturn(mockRightInputStream);
when(mockTranslatorContext.getStreamAppDescriptor()).thenReturn(mockAppDesc);
InputOperatorSpec mockInputOp = mock(InputOperatorSpec.class);
OutputStreamImpl mockOutputStream = mock(OutputStreamImpl.class);
when(mockInputOp.isKeyed()).thenReturn(true);
when(mockOutputStream.isKeyed()).thenReturn(true);
doAnswer(this.getRegisterMessageStreamAnswer()).when(mockTranslatorContext).registerMessageStream(eq(3), any(MessageStream.class));
RexToJavaCompiler mockCompiler = mock(RexToJavaCompiler.class);
when(mockTranslatorContext.getExpressionCompiler()).thenReturn(mockCompiler);
Expression mockExpr = mock(Expression.class);
when(mockCompiler.compile(any(), any())).thenReturn(mockExpr);
if (isRemoteTable) {
doAnswer(this.getRegisteredTableAnswer()).when(mockAppDesc).getTable(any(RemoteTableDescriptor.class));
} else {
IntermediateMessageStreamImpl mockPartitionedStream = new IntermediateMessageStreamImpl(mockAppDesc, mockInputOp, mockOutputStream);
when(mockAppDesc.getIntermediateStream(any(String.class), any(Serde.class), eq(false))).thenReturn(mockPartitionedStream);
doAnswer(this.getRegisteredTableAnswer()).when(mockAppDesc).getTable(any(RocksDbTableDescriptor.class));
}
when(mockJoin.getJoinType()).thenReturn(JoinRelType.INNER);
SamzaSqlExecutionContext mockExecutionContext = mock(SamzaSqlExecutionContext.class);
when(mockTranslatorContext.getExecutionContext()).thenReturn(mockExecutionContext);
SamzaSqlApplicationConfig mockAppConfig = mock(SamzaSqlApplicationConfig.class);
when(mockExecutionContext.getSamzaSqlApplicationConfig()).thenReturn(mockAppConfig);
Map<String, SqlIOConfig> ssConfigBySource = mock(HashMap.class);
when(mockAppConfig.getInputSystemStreamConfigBySource()).thenReturn(ssConfigBySource);
SqlIOConfig mockIOConfig = mock(SqlIOConfig.class);
TableDescriptor mockTableDesc;
if (isRemoteTable) {
mockTableDesc = mock(RemoteTableDescriptor.class);
} else {
mockTableDesc = mock(RocksDbTableDescriptor.class);
}
when(ssConfigBySource.get(String.join(".", qualifiedTableName))).thenReturn(mockIOConfig);
when(mockIOConfig.getTableDescriptor()).thenReturn(Optional.of(mockTableDesc));
JoinTranslator joinTranslator = new JoinTranslator(logicalOpId, "", queryId);
// Verify Metrics Works with Join
Context mockContext = mock(Context.class);
ContainerContext mockContainerContext = mock(ContainerContext.class);
TestMetricsRegistryImpl testMetricsRegistryImpl = new TestMetricsRegistryImpl();
when(mockContext.getContainerContext()).thenReturn(mockContainerContext);
when(mockContainerContext.getContainerMetricsRegistry()).thenReturn(testMetricsRegistryImpl);
TranslatorInputMetricsMapFunction inputMetricsMF = joinTranslator.getInputMetricsMF();
assertNotNull(inputMetricsMF);
inputMetricsMF.init(mockContext);
TranslatorOutputMetricsMapFunction outputMetricsMF = joinTranslator.getOutputMetricsMF();
assertNotNull(outputMetricsMF);
outputMetricsMF.init(mockContext);
assertEquals(1, testMetricsRegistryImpl.getCounters().size());
assertEquals(2, testMetricsRegistryImpl.getCounters().get(logicalOpId).size());
assertEquals(0, testMetricsRegistryImpl.getCounters().get(logicalOpId).get(0).getCount());
assertEquals(0, testMetricsRegistryImpl.getCounters().get(logicalOpId).get(1).getCount());
assertEquals(1, testMetricsRegistryImpl.getGauges().size());
// Apply translate() method to verify that we are getting the correct map operator constructed
joinTranslator.translate(mockJoin, mockTranslatorContext);
// make sure that context has been registered with LogicFilter and output message streams
verify(mockTranslatorContext, times(1)).registerMessageStream(3, this.getRegisteredMessageStream(3));
when(mockTranslatorContext.getRelNode(3)).thenReturn(mockJoin);
when(mockTranslatorContext.getMessageStream(3)).thenReturn(this.getRegisteredMessageStream(3));
StreamTableJoinOperatorSpec joinSpec = (StreamTableJoinOperatorSpec) Whitebox.getInternalState(this.getRegisteredMessageStream(3), "operatorSpec");
assertNotNull(joinSpec);
assertEquals(joinSpec.getOpCode(), OperatorSpec.OpCode.JOIN);
// Verify joinSpec has the corresponding setup
StreamTableJoinFunction joinFn = joinSpec.getJoinFn();
assertNotNull(joinFn);
if (isRemoteTable) {
assertTrue(joinFn instanceof SamzaSqlRemoteTableJoinFunction);
} else {
assertTrue(joinFn instanceof SamzaSqlLocalTableJoinFunction);
}
assertTrue(Whitebox.getInternalState(joinFn, "isTablePosOnRight").equals(false));
assertEquals(Collections.singletonList(0), Whitebox.getInternalState(joinFn, "streamFieldIds"));
assertEquals(leftFieldNames, Whitebox.getInternalState(joinFn, "tableFieldNames"));
List<String> outputFieldNames = new ArrayList<>();
outputFieldNames.addAll(leftFieldNames);
outputFieldNames.addAll(rightStreamFieldNames);
assertEquals(outputFieldNames, Whitebox.getInternalState(joinFn, "outFieldNames"));
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TaskApplicationExample method describe.
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
// add input and output streams
KafkaSystemDescriptor ksd = new KafkaSystemDescriptor("tracking");
KafkaInputDescriptor<String> isd = ksd.getInputDescriptor("myinput", new StringSerde());
KafkaOutputDescriptor<String> osd = ksd.getOutputDescriptor("myout", new StringSerde());
TableDescriptor td = new RocksDbTableDescriptor("mytable", new KVSerde(new NoOpSerde(), new NoOpSerde()));
appDescriptor.withInputStream(isd).withOutputStream(osd).withTable(td).withTaskFactory((StreamTaskFactory) () -> new MyStreamTask());
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TableConfigGenerator method generateSerdeConfig.
/**
* Generate serde configuration for provided tables
*
* @param tableDescriptors table descriptors, for which serde configuration to be generated
* @return serde configuration for tables
*/
public static Map<String, String> generateSerdeConfig(List<TableDescriptor> tableDescriptors) {
Map<String, String> serdeConfigs = new HashMap<>();
// Collect key and msg serde instances for all the tables
Map<String, Serde> tableKeySerdes = new HashMap<>();
Map<String, Serde> tableValueSerdes = new HashMap<>();
HashSet<Serde> serdes = new HashSet<>();
tableDescriptors.stream().filter(d -> d instanceof LocalTableDescriptor).forEach(d -> {
LocalTableDescriptor ld = (LocalTableDescriptor) d;
tableKeySerdes.put(ld.getTableId(), ld.getSerde().getKeySerde());
tableValueSerdes.put(ld.getTableId(), ld.getSerde().getValueSerde());
});
serdes.addAll(tableKeySerdes.values());
serdes.addAll(tableValueSerdes.values());
// Generate serde names
SerializableSerde<Serde> serializableSerde = new SerializableSerde<>();
Base64.Encoder base64Encoder = Base64.getEncoder();
Map<Serde, String> serdeUUIDs = new HashMap<>();
serdes.forEach(serde -> {
String serdeName = serdeUUIDs.computeIfAbsent(serde, s -> serde.getClass().getSimpleName() + "-" + UUID.randomUUID().toString());
serdeConfigs.putIfAbsent(String.format(SerializerConfig.SERDE_SERIALIZED_INSTANCE, serdeName), base64Encoder.encodeToString(serializableSerde.toBytes(serde)));
});
// Set key and msg serdes for tables to the serde names generated above
tableKeySerdes.forEach((tableId, serde) -> {
String keySerdeConfigKey = String.format(JavaTableConfig.STORE_KEY_SERDE, tableId);
serdeConfigs.put(keySerdeConfigKey, serdeUUIDs.get(serde));
});
tableValueSerdes.forEach((tableId, serde) -> {
String valueSerdeConfigKey = String.format(JavaTableConfig.STORE_MSG_SERDE, tableId);
serdeConfigs.put(valueSerdeConfigKey, serdeUUIDs.get(serde));
});
return serdeConfigs;
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class ExecutionPlanner method groupJoinedStreams.
/**
* Groups streams participating in joins together.
*/
private static List<StreamSet> groupJoinedStreams(JobGraph jobGraph) {
// Group input operator specs (input/intermediate streams) by the joins they participate in.
Multimap<OperatorSpec, InputOperatorSpec> joinOpSpecToInputOpSpecs = OperatorSpecGraphAnalyzer.getJoinToInputOperatorSpecs(jobGraph.getApplicationDescriptorImpl().getInputOperators().values());
Map<String, TableDescriptor> tableDescriptors = jobGraph.getTables().stream().collect(Collectors.toMap(TableDescriptor::getTableId, Function.identity()));
// Convert every group of input operator specs into a group of corresponding stream edges.
List<StreamSet> streamSets = new ArrayList<>();
for (OperatorSpec joinOpSpec : joinOpSpecToInputOpSpecs.keySet()) {
Collection<InputOperatorSpec> joinedInputOpSpecs = joinOpSpecToInputOpSpecs.get(joinOpSpec);
StreamSet streamSet = getStreamSet(joinOpSpec.getOpId(), joinedInputOpSpecs, jobGraph);
// streams associated with the joined table (if any).
if (joinOpSpec instanceof StreamTableJoinOperatorSpec) {
StreamTableJoinOperatorSpec streamTableJoinOperatorSpec = (StreamTableJoinOperatorSpec) joinOpSpec;
TableDescriptor tableDescriptor = tableDescriptors.get(streamTableJoinOperatorSpec.getTableId());
if (tableDescriptor instanceof LocalTableDescriptor) {
LocalTableDescriptor localTableDescriptor = (LocalTableDescriptor) tableDescriptor;
Collection<String> sideInputs = ListUtils.emptyIfNull(localTableDescriptor.getSideInputs());
Iterable<StreamEdge> sideInputStreams = sideInputs.stream().map(jobGraph::getStreamEdge)::iterator;
Iterable<StreamEdge> streams = streamSet.getStreamEdges();
streamSet = new StreamSet(streamSet.getSetId(), Iterables.concat(streams, sideInputStreams));
}
}
streamSets.add(streamSet);
}
return Collections.unmodifiableList(streamSets);
}
use of org.apache.samza.table.descriptors.TableDescriptor in project samza by apache.
the class TestExecutionPlanner method createStreamGraphWithStreamTableJoin.
private StreamApplicationDescriptorImpl createStreamGraphWithStreamTableJoin() {
/**
* Example stream-table join app. Expected partition counts of intermediate streams introduced
* by partitionBy operations are enclosed in quotes.
*
* input2 (16) -> partitionBy ("32") -> send-to-table t
*
* join-table t —————
* | |
* input1 (64) -> partitionBy ("32") _| |
* join -> output1 (8)
* |
* input3 (32) ——————
*/
return new StreamApplicationDescriptorImpl(appDesc -> {
MessageStream<KV<Object, Object>> messageStream1 = appDesc.getInputStream(input1Descriptor);
MessageStream<KV<Object, Object>> messageStream2 = appDesc.getInputStream(input2Descriptor);
MessageStream<KV<Object, Object>> messageStream3 = appDesc.getInputStream(input3Descriptor);
OutputStream<KV<Object, Object>> output1 = appDesc.getOutputStream(output1Descriptor);
TableDescriptor tableDescriptor = new TestLocalTableDescriptor.MockLocalTableDescriptor("table-id", new KVSerde(new StringSerde(), new StringSerde()));
Table table = appDesc.getTable(tableDescriptor);
messageStream2.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p1").sendTo(table);
messageStream1.partitionBy(m -> m.key, m -> m.value, mock(KVSerde.class), "p2").join(table, mock(StreamTableJoinFunction.class)).join(messageStream3, mock(JoinFunction.class), mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j2").sendTo(output1);
}, config);
}
Aggregations