use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestRemoteTableEndToEnd method testSendToWithDefaultsAndUpdateOnly.
// Test fails with the following exception:
// org.apache.samza.SamzaException: Put default failed for update as the UpdateOptions was set to UPDATE_ONLY.
// Please use UpdateOptions.UPDATE_WITH_DEFAULTS instead.
@Test(expected = SamzaException.class)
public void testSendToWithDefaultsAndUpdateOnly() throws Exception {
String testName = "testSendToWithDefaultsAndUpdateOnly";
final String profiles = Base64Serializer.serialize(generateProfiles(30));
final RateLimiter readRateLimiter = mock(RateLimiter.class, withSettings().serializable());
final TableRateLimiter.CreditFunction creditFunction = (k, v, args) -> 1;
final StreamApplication app = appDesc -> {
final RemoteTableDescriptor joinTableDesc = new RemoteTableDescriptor<Integer, TestTableData.Profile, Void>("profile-table-1").withReadFunction(InMemoryProfileReadFunction.getInMemoryReadFunction(profiles)).withRateLimiter(readRateLimiter, creditFunction, null);
final RemoteTableDescriptor outputTableDesc = new RemoteTableDescriptor<Integer, EnrichedPageView, EnrichedPageView>("enriched-page-view-table-1").withReadFunction(new NoOpTableReadFunction<>()).withReadRateLimiterDisabled().withWriteFunction(new InMemoryEnrichedPageViewWriteFunction2(testName, false)).withWriteRateLimit(1000);
// counters to count puts and updates
COUNTERS.put(testName + "-put", new AtomicInteger());
COUNTERS.put(testName + "-update", new AtomicInteger());
final Table<KV<Integer, Profile>> outputTable = appDesc.getTable(outputTableDesc);
final Table<KV<Integer, Profile>> joinTable = appDesc.getTable(joinTableDesc);
final DelegatingSystemDescriptor ksd = new DelegatingSystemDescriptor("test");
final GenericInputDescriptor<PageView> isd = ksd.getInputDescriptor("PageView", new NoOpSerde<>());
appDesc.getInputStream(isd).map(pv -> new KV<>(pv.getMemberId(), pv)).join(joinTable, new PageViewToProfileJoinFunction()).map(m -> new KV(m.getMemberId(), UpdateMessage.of(m, m))).sendTo(outputTable, UpdateOptions.UPDATE_ONLY);
};
int numPageViews = 15;
InMemorySystemDescriptor isd = new InMemorySystemDescriptor("test");
InMemoryInputDescriptor<PageView> inputDescriptor = isd.getInputDescriptor("PageView", new NoOpSerde<>());
Map<Integer, List<PageView>> integerListMap = TestTableData.generatePartitionedPageViews(numPageViews, 1);
TestRunner.of(app).addInputStream(inputDescriptor, integerListMap).run(Duration.ofSeconds(10));
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project beam by apache.
the class TranslationContext method createDummyStreamDescriptor.
/**
* The dummy stream created will only be used in Beam tests.
*/
private static InputDescriptor<OpMessage<String>, ?> createDummyStreamDescriptor(String id) {
final GenericSystemDescriptor dummySystem = new GenericSystemDescriptor(id, InMemorySystemFactory.class.getName());
final GenericInputDescriptor<OpMessage<String>> dummyInput = dummySystem.getInputDescriptor(id, new NoOpSerde<>());
dummyInput.withOffsetDefault(SystemStreamMetadata.OffsetType.OLDEST);
final Config config = new MapConfig(dummyInput.toConfig(), dummySystem.toConfig());
final SystemFactory factory = new InMemorySystemFactory();
final StreamSpec dummyStreamSpec = new StreamSpec(id, id, id, 1);
factory.getAdmin(id, config).createStream(dummyStreamSpec);
final SystemProducer producer = factory.getProducer(id, config, null);
final SystemStream sysStream = new SystemStream(id, id);
final Consumer<Object> sendFn = (msg) -> {
producer.send(id, new OutgoingMessageEnvelope(sysStream, 0, null, msg));
};
final WindowedValue<String> windowedValue = WindowedValue.timestampedValueInGlobalWindow("dummy", new Instant());
sendFn.accept(OpMessage.ofElement(windowedValue));
sendFn.accept(new WatermarkMessage(BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis()));
sendFn.accept(new EndOfStreamMessage(null));
return dummyInput;
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testJoinChain.
@Test
public void testJoinChain() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputSystem = "input-system";
String inputPhysicalName1 = "input-stream1";
String inputPhysicalName2 = "input-stream2";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputPhysicalName1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputPhysicalName2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
Integer joinKey = new Integer(1);
Function<Object, Integer> keyFn = (Function & Serializable) m -> joinKey;
JoinFunction testJoinFunction = new TestJoinFunction("jobName-jobId-join-j1", (BiFunction & Serializable) (m1, m2) -> KV.of(m1, m2), keyFn, keyFn);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
inputStream1.join(inputStream2, testJoinFunction, mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j1");
}, config);
TaskName mockTaskName = mock(TaskName.class);
TaskModel taskModel = mock(TaskModel.class);
when(taskModel.getTaskName()).thenReturn(mockTaskName);
when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
KeyValueStore mockLeftStore = mock(KeyValueStore.class);
when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(mockLeftStore);
KeyValueStore mockRightStore = mock(KeyValueStore.class);
when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(mockRightStore);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
// verify that join function is initialized once.
assertEquals(TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1").numInitCalled, 1);
InputOperatorImpl inputOpImpl1 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName1));
InputOperatorImpl inputOpImpl2 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName2));
PartialJoinOperatorImpl leftPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl1.registeredOperators.iterator().next();
PartialJoinOperatorImpl rightPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl2.registeredOperators.iterator().next();
assertEquals(leftPartialJoinOpImpl.getOperatorSpec(), rightPartialJoinOpImpl.getOperatorSpec());
assertNotSame(leftPartialJoinOpImpl, rightPartialJoinOpImpl);
// verify that left partial join operator calls getFirstKey
Object mockLeftMessage = mock(Object.class);
long currentTimeMillis = System.currentTimeMillis();
when(mockLeftStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockLeftMessage, currentTimeMillis));
IncomingMessageEnvelope leftMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockLeftMessage);
inputOpImpl1.onMessage(leftMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
// verify that right partial join operator calls getSecondKey
Object mockRightMessage = mock(Object.class);
when(mockRightStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockRightMessage, currentTimeMillis));
IncomingMessageEnvelope rightMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockRightMessage);
inputOpImpl2.onMessage(rightMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
// verify that the join function apply is called with the correct messages on match
assertEquals(((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.size(), 1);
KV joinResult = (KV) ((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.iterator().next();
assertEquals(joinResult.getKey(), mockLeftMessage);
assertEquals(joinResult.getValue(), mockRightMessage);
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testPartitionByChain.
@Test
public void testPartitionByChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
String inputPhysicalName = "input-stream";
String outputStreamId = "output";
String outputSystem = "output-system";
String outputPhysicalName = "output-stream";
String intermediateStreamId = "jobName-jobId-partition_by-p1";
String intermediateSystem = "intermediate-system";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
configs.put(JobConfig.JOB_DEFAULT_SYSTEM, intermediateSystem);
StreamTestUtils.addStreamConfigs(configs, inputStreamId, inputSystem, inputPhysicalName);
StreamTestUtils.addStreamConfigs(configs, outputStreamId, outputSystem, outputPhysicalName);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor isd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericSystemDescriptor osd = new GenericSystemDescriptor(outputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = isd.getInputDescriptor(inputStreamId, mock(Serde.class));
GenericOutputDescriptor outputDescriptor = osd.getOutputDescriptor(outputStreamId, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
OutputStream<KV<Integer, String>> outputStream = appDesc.getOutputStream(outputDescriptor);
inputStream.partitionBy(Object::hashCode, Object::toString, KVSerde.of(mock(IntegerSerde.class), mock(StringSerde.class)), "p1").sendTo(outputStream);
}, config);
JobModel jobModel = mock(JobModel.class);
ContainerModel containerModel = mock(ContainerModel.class);
TaskModel taskModel = mock(TaskModel.class);
when(jobModel.getContainers()).thenReturn(Collections.singletonMap("0", containerModel));
when(containerModel.getTasks()).thenReturn(Collections.singletonMap(new TaskName("task 0"), taskModel));
when(taskModel.getSystemStreamPartitions()).thenReturn(Collections.emptySet());
when(((TaskContextImpl) this.context.getTaskContext()).getJobModel()).thenReturn(jobModel);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
InputOperatorImpl inputOpImpl = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName));
assertEquals(1, inputOpImpl.registeredOperators.size());
OperatorImpl partitionByOpImpl = (PartitionByOperatorImpl) inputOpImpl.registeredOperators.iterator().next();
// is terminal but paired with an input operator
assertEquals(0, partitionByOpImpl.registeredOperators.size());
assertEquals(OpCode.PARTITION_BY, partitionByOpImpl.getOperatorSpec().getOpCode());
InputOperatorImpl repartitionedInputOpImpl = opImplGraph.getInputOperator(new SystemStream(intermediateSystem, intermediateStreamId));
assertEquals(1, repartitionedInputOpImpl.registeredOperators.size());
OperatorImpl sendToOpImpl = (OutputOperatorImpl) repartitionedInputOpImpl.registeredOperators.iterator().next();
assertEquals(0, sendToOpImpl.registeredOperators.size());
assertEquals(OpCode.SEND_TO, sendToOpImpl.getOperatorSpec().getOpCode());
}
use of org.apache.samza.system.descriptors.GenericInputDescriptor in project samza by apache.
the class TestOperatorImplGraph method testMergeChain.
@Test
public void testMergeChain() {
String inputStreamId = "input";
String inputSystem = "input-system";
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor = sd.getInputDescriptor(inputStreamId, mock(Serde.class));
MessageStream<Object> inputStream = appDesc.getInputStream(inputDescriptor);
MessageStream<Object> stream1 = inputStream.filter(mock(FilterFunction.class));
MessageStream<Object> stream2 = inputStream.map(mock(MapFunction.class));
stream1.merge(Collections.singleton(stream2)).map(new TestMapFunction<Object, Object>("test-map-1", (Function & Serializable) m -> m));
}, getConfig());
TaskName mockTaskName = mock(TaskName.class);
TaskModel taskModel = mock(TaskModel.class);
when(taskModel.getTaskName()).thenReturn(mockTaskName);
when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
Set<OperatorImpl> opSet = opImplGraph.getAllInputOperators().stream().collect(HashSet::new, (s, op) -> addOperatorRecursively(s, op), HashSet::addAll);
Object[] mergeOps = opSet.stream().filter(op -> op.getOperatorSpec().getOpCode() == OpCode.MERGE).toArray();
assertEquals(1, mergeOps.length);
assertEquals(1, ((OperatorImpl) mergeOps[0]).registeredOperators.size());
OperatorImpl mapOp = (OperatorImpl) ((OperatorImpl) mergeOps[0]).registeredOperators.iterator().next();
assertEquals(mapOp.getOperatorSpec().getOpCode(), OpCode.MAP);
// verify that the DAG after merge is only traversed & initialized once
assertEquals(TestMapFunction.getInstanceByTaskName(mockTaskName, "test-map-1").numInitCalled, 1);
}
Aggregations