use of org.apache.samza.operators.KV in project samza by apache.
the class KVSerde method fromBytes.
@Override
public KV<K, V> fromBytes(byte[] bytes) {
if (bytes != null) {
ByteBuffer byteBuffer = ByteBuffer.wrap(bytes);
int keyLength = byteBuffer.getInt();
byte[] keyBytes = new byte[keyLength];
byteBuffer.get(keyBytes);
int valueLength = byteBuffer.getInt();
byte[] valueBytes = new byte[valueLength];
byteBuffer.get(valueBytes);
K key = keySerde.fromBytes(keyBytes);
V value = valueSerde.fromBytes(valueBytes);
return KV.of(key, value);
} else {
return null;
}
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestOperatorImplGraph method testJoinChain.
@Test
public void testJoinChain() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputSystem = "input-system";
String inputPhysicalName1 = "input-stream1";
String inputPhysicalName2 = "input-stream2";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputPhysicalName1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputPhysicalName2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
Integer joinKey = new Integer(1);
Function<Object, Integer> keyFn = (Function & Serializable) m -> joinKey;
JoinFunction testJoinFunction = new TestJoinFunction("jobName-jobId-join-j1", (BiFunction & Serializable) (m1, m2) -> KV.of(m1, m2), keyFn, keyFn);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
inputStream1.join(inputStream2, testJoinFunction, mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j1");
}, config);
TaskName mockTaskName = mock(TaskName.class);
TaskModel taskModel = mock(TaskModel.class);
when(taskModel.getTaskName()).thenReturn(mockTaskName);
when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
KeyValueStore mockLeftStore = mock(KeyValueStore.class);
when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(mockLeftStore);
KeyValueStore mockRightStore = mock(KeyValueStore.class);
when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(mockRightStore);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
// verify that join function is initialized once.
assertEquals(TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1").numInitCalled, 1);
InputOperatorImpl inputOpImpl1 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName1));
InputOperatorImpl inputOpImpl2 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName2));
PartialJoinOperatorImpl leftPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl1.registeredOperators.iterator().next();
PartialJoinOperatorImpl rightPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl2.registeredOperators.iterator().next();
assertEquals(leftPartialJoinOpImpl.getOperatorSpec(), rightPartialJoinOpImpl.getOperatorSpec());
assertNotSame(leftPartialJoinOpImpl, rightPartialJoinOpImpl);
// verify that left partial join operator calls getFirstKey
Object mockLeftMessage = mock(Object.class);
long currentTimeMillis = System.currentTimeMillis();
when(mockLeftStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockLeftMessage, currentTimeMillis));
IncomingMessageEnvelope leftMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockLeftMessage);
inputOpImpl1.onMessage(leftMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
// verify that right partial join operator calls getSecondKey
Object mockRightMessage = mock(Object.class);
when(mockRightStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockRightMessage, currentTimeMillis));
IncomingMessageEnvelope rightMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockRightMessage);
inputOpImpl2.onMessage(rightMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
// verify that the join function apply is called with the correct messages on match
assertEquals(((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.size(), 1);
KV joinResult = (KV) ((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.iterator().next();
assertEquals(joinResult.getKey(), mockLeftMessage);
assertEquals(joinResult.getValue(), mockRightMessage);
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestOperatorImplGraph method testPartitionByChain.
@Test
public void testPartitionByChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
String inputPhysicalName = "input-stream";
String outputStreamId = "output";
String outputSystem = "output-system";
String outputPhysicalName = "output-stream";
String intermediateStreamId = "jobName-jobId-partition_by-p1";
String intermediateSystem = "intermediate-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = isd.getInputDescriptor(inputStreamId, mock(Serde.class));
GenericOutputDescriptor outputDescriptor = osd.getOutputDescriptor(outputStreamId, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
OutputStream<KV<Integer, String>> outputStream = appDesc.getOutputStream(outputDescriptor);
inputStream.partitionBy(Object::hashCode, Object::toString, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)), "p1").sendTo(outputStream);
}, config);
JobModel jobModel = mock(JobModel.class);
ContainerModel containerModel = mock(ContainerModel.class);
TaskModel taskModel = mock(TaskModel.class);
when(jobModel.getContainers()).thenReturn(Collections.singletonMap("0", containerModel));
when(containerModel.getTasks()).thenReturn(Collections.singletonMap(new TaskName("task 0"), taskModel));
when(taskModel.getSystemStreamPartitions()).thenReturn(Collections.emptySet());
when(((TaskContextImpl) this.context.getTaskContext()).getJobModel()).thenReturn(jobModel);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
assertEquals(1, inputOpImpl.registeredOperators.size());
OperatorImpl partitionByOpImpl = (PartitionByOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
// is terminal but paired with an input operator
assertEquals(0, partitionByOpImpl.registeredOperators.size());
assertEquals(OpCode.PARTITION_BY, partitionByOpImpl.getOperatorSpec().getOpCode());
InputOperatorImpl repartitionedInputOpImpl = opImplGraph.getInputOperator(new SystemStream(intermediateSystem, intermediateStreamId));
assertEquals(1, repartitionedInputOpImpl.registeredOperators.size());
OperatorImpl sendToOpImpl = (OutputOperatorImpl) repartitionedInputOpImpl.registeredOperators.iterator().next();
assertEquals(0, sendToOpImpl.registeredOperators.size());
assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestStreamTableJoinOperatorImpl method testJoinFunctionIsInvokedOnlyOnce.
/**
* Ensure join function is not invoked more than once when join function returns null on the first invocation
*/
@Test
public void testJoinFunctionIsInvokedOnlyOnce() {
final String tableId = "testTable";
final CountDownLatch joinInvokedLatch = new CountDownLatch(1);
StreamTableJoinOperatorSpec mockJoinOpSpec = mock(StreamTableJoinOperatorSpec.class);
when(mockJoinOpSpec.getTableId()).thenReturn(tableId);
when(mockJoinOpSpec.getArgs()).thenReturn(new Object[0]);
when(mockJoinOpSpec.getJoinFn()).thenReturn(new StreamTableJoinFunction<String, KV<String, String>, KV<String, String>, String>() {
@Override
public String apply(KV<String, String> message, KV<String, String> record) {
joinInvokedLatch.countDown();
return null;
}
@Override
public String getMessageKey(KV<String, String> message) {
return message.getKey();
}
@Override
public String getRecordKey(KV<String, String> record) {
return record.getKey();
}
});
ReadWriteUpdateTable table = mock(ReadWriteUpdateTable.class);
when(table.getAsync("1")).thenReturn(CompletableFuture.completedFuture("r1"));
Context context = new MockContext();
when(context.getTaskContext().getUpdatableTable(tableId)).thenReturn(table);
MessageCollector mockMessageCollector = mock(MessageCollector.class);
TaskCoordinator mockTaskCoordinator = mock(TaskCoordinator.class);
StreamTableJoinOperatorImpl streamTableJoinOperator = new StreamTableJoinOperatorImpl(mockJoinOpSpec, context);
// Table has the key
streamTableJoinOperator.handleMessage(KV.of("1", "m1"), mockMessageCollector, mockTaskCoordinator);
assertEquals("Join function should only be invoked once", 0, joinInvokedLatch.getCount());
}
use of org.apache.samza.operators.KV in project samza by apache.
the class TestOperatorSpec method testOutputOperatorSpec.
@Test
public void testOutputOperatorSpec() {
Serde<Object> objSerde = new Serde<Object>() {
@Override
public Object fromBytes(byte[] bytes) {
return null;
}
@Override
public byte[] toBytes(Object object) {
return new byte[0];
}
};
OutputStreamImpl<KV<String, Object>> outputStrmImpl = new OutputStreamImpl<>("mockStreamId", new StringSerde("UTF-8"), objSerde, true);
OutputOperatorSpec<KV<String, Object>> outputOperatorSpec = new OutputOperatorSpec<>(outputStrmImpl, "op0");
OutputOperatorSpec<KV<String, Object>> outputOpCopy = (OutputOperatorSpec<KV<String, Object>>) OperatorSpecTestUtils.copyOpSpec(outputOperatorSpec);
assertNotEquals("Expected deserialized copy of operator spec should not be the same as the original operator spec", outputOperatorSpec, outputOpCopy);
assertTrue(outputOperatorSpec.isClone(outputOpCopy));
}
Aggregations