use of org.apache.samza.system.EndOfStreamMessage in project samza by apache.
the class StreamOperatorTask method processAsync.
/**
* Passes the incoming message envelopes along to the {@link InputOperatorImpl} node
* for the input {@link SystemStream}. It is non-blocking and dispatches the message to the container thread
* pool. The thread pool size is configured through job.container.thread.pool.size. In the absence of the config,
* the task executes the DAG on the run loop thread.
* <p>
* From then on, each {@link org.apache.samza.operators.impl.OperatorImpl} propagates its transformed output to
* its chained {@link org.apache.samza.operators.impl.OperatorImpl}s itself.
*
* @param ime incoming message envelope to process
* @param collector the collector to send messages with
* @param coordinator the coordinator to request commits or shutdown
* @param callback the task callback handle
*/
@Override
public final void processAsync(IncomingMessageEnvelope ime, MessageCollector collector, TaskCoordinator coordinator, TaskCallback callback) {
Runnable processRunnable = () -> {
try {
SystemStream systemStream = ime.getSystemStreamPartition().getSystemStream();
InputOperatorImpl inputOpImpl = operatorImplGraph.getInputOperator(systemStream);
if (inputOpImpl != null) {
CompletionStage<Void> processFuture;
MessageType messageType = MessageType.of(ime.getMessage());
switch(messageType) {
case USER_MESSAGE:
processFuture = inputOpImpl.onMessageAsync(ime, collector, coordinator);
break;
case END_OF_STREAM:
EndOfStreamMessage eosMessage = (EndOfStreamMessage) ime.getMessage();
processFuture = inputOpImpl.aggregateEndOfStream(eosMessage, ime.getSystemStreamPartition(), collector, coordinator);
break;
case WATERMARK:
WatermarkMessage watermarkMessage = (WatermarkMessage) ime.getMessage();
processFuture = inputOpImpl.aggregateWatermark(watermarkMessage, ime.getSystemStreamPartition(), collector, coordinator);
break;
default:
processFuture = failedFuture(new SamzaException("Unknown message type " + messageType + " encountered."));
break;
}
processFuture.whenComplete((val, ex) -> {
if (ex != null) {
callback.failure(ex);
} else {
callback.complete();
}
});
} else {
// If InputOperator is not found in the operator graph for a given SystemStream, throw an exception else the
// job will timeout due to async task callback timeout (TaskCallbackTimeoutException)
final String errMessage = String.format("InputOperator not found in OperatorGraph for %s. The available input" + " operators are: %s. Please check SystemStream configuration for the `SystemConsumer` and/or task.inputs" + " task configuration.", systemStream, operatorImplGraph.getAllInputOperators());
LOG.error(errMessage);
callback.failure(new SamzaException(errMessage));
}
} catch (Exception e) {
LOG.error("Failed to process the incoming message due to ", e);
callback.failure(e);
}
};
if (taskThreadPool != null) {
LOG.debug("Processing message using thread pool.");
taskThreadPool.submit(processRunnable);
} else {
LOG.debug("Processing message on the run loop thread.");
processRunnable.run();
}
}
use of org.apache.samza.system.EndOfStreamMessage in project samza by apache.
the class IntermediateMessageSerde method toBytes.
@Override
public byte[] toBytes(Object object) {
final byte[] data;
final MessageType type = MessageType.of(object);
switch(type) {
case USER_MESSAGE:
data = userMessageSerde.toBytes(object);
break;
case WATERMARK:
data = watermarkSerde.toBytes((WatermarkMessage) object);
break;
case END_OF_STREAM:
data = eosSerde.toBytes((EndOfStreamMessage) object);
break;
default:
throw new SamzaException("Unknown message type: " + type.name());
}
final byte[] bytes = new byte[data.length + 1];
bytes[0] = (byte) type.ordinal();
System.arraycopy(data, 0, bytes, 1, data.length);
return bytes;
}
use of org.apache.samza.system.EndOfStreamMessage in project samza by apache.
the class TestInMemorySystem method testEndOfStreamMessageWithoutTask.
@Test
public void testEndOfStreamMessageWithoutTask() {
EndOfStreamMessage eos = new EndOfStreamMessage();
produceMessages(eos);
Set<SystemStreamPartition> sspsToPoll = IntStream.range(0, PARTITION_COUNT).mapToObj(partition -> new SystemStreamPartition(SYSTEM_STREAM, new Partition(partition))).collect(Collectors.toSet());
List<IncomingMessageEnvelope> results = consumeRawMessages(sspsToPoll);
assertEquals(1, results.size());
assertNull(((EndOfStreamMessage) results.get(0).getMessage()).getTaskName());
assertTrue(results.get(0).isEndOfStream());
}
use of org.apache.samza.system.EndOfStreamMessage in project samza by apache.
the class TestInMemorySystem method testEndOfStreamMessageWithTask.
@Test
public void testEndOfStreamMessageWithTask() {
EndOfStreamMessage eos = new EndOfStreamMessage("test-task");
produceMessages(eos);
Set<SystemStreamPartition> sspsToPoll = IntStream.range(0, PARTITION_COUNT).mapToObj(partition -> new SystemStreamPartition(SYSTEM_STREAM, new Partition(partition))).collect(Collectors.toSet());
List<IncomingMessageEnvelope> results = consumeRawMessages(sspsToPoll);
assertEquals(1, results.size());
assertEquals("test-task", ((EndOfStreamMessage) results.get(0).getMessage()).getTaskName());
assertFalse(results.get(0).isEndOfStream());
}
use of org.apache.samza.system.EndOfStreamMessage in project samza by apache.
the class TestRunner method initializeInMemoryInputStream.
/**
* Creates an in memory stream with {@link InMemorySystemFactory} and feeds its partition with stream of messages
* @param partitionData key of the map represents partitionId and value represents messages in the partition
* @param descriptor describes a stream to initialize with the in memory system
*/
private <StreamMessageType> void initializeInMemoryInputStream(InMemoryInputDescriptor<?> descriptor, Map<Integer, Iterable<StreamMessageType>> partitionData) {
String systemName = descriptor.getSystemName();
String streamName = (String) descriptor.getPhysicalName().orElse(descriptor.getStreamId());
if (this.app instanceof LegacyTaskApplication) {
// for legacy applications that only specify task.class.
if (configs.containsKey(TaskConfig.INPUT_STREAMS)) {
configs.put(TaskConfig.INPUT_STREAMS, configs.get(TaskConfig.INPUT_STREAMS).concat("," + systemName + "." + streamName));
} else {
configs.put(TaskConfig.INPUT_STREAMS, systemName + "." + streamName);
}
}
InMemorySystemDescriptor imsd = (InMemorySystemDescriptor) descriptor.getSystemDescriptor();
imsd.withInMemoryScope(this.inMemoryScope);
addConfig(descriptor.toConfig());
addConfig(descriptor.getSystemDescriptor().toConfig());
addSerdeConfigs(descriptor);
StreamSpec spec = new StreamSpec(descriptor.getStreamId(), streamName, systemName, partitionData.size());
SystemFactory factory = new InMemorySystemFactory();
Config config = new MapConfig(descriptor.toConfig(), descriptor.getSystemDescriptor().toConfig());
factory.getAdmin(systemName, config).createStream(spec);
InMemorySystemProducer producer = (InMemorySystemProducer) factory.getProducer(systemName, config, null);
SystemStream sysStream = new SystemStream(systemName, streamName);
partitionData.forEach((partitionId, partition) -> {
partition.forEach(e -> {
Object key = e instanceof KV ? ((KV) e).getKey() : null;
Object value = e instanceof KV ? ((KV) e).getValue() : e;
if (value instanceof IncomingMessageEnvelope) {
producer.send((IncomingMessageEnvelope) value);
} else {
producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), key, value));
}
});
producer.send(systemName, new OutgoingMessageEnvelope(sysStream, Integer.valueOf(partitionId), null, new EndOfStreamMessage(null)));
});
}
Aggregations