use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.
the class TaskSideInputHandler method process.
/**
* Processes the incoming side input message envelope and updates the last processed offset for its SSP.
* Synchronized inorder to be exclusive with flush().
*
* @param envelope incoming envelope to be processed
*/
public synchronized void process(IncomingMessageEnvelope envelope) {
SystemStreamPartition envelopeSSP = envelope.getSystemStreamPartition();
String envelopeOffset = envelope.getOffset();
for (String store : this.sspToStores.get(envelopeSSP)) {
SideInputsProcessor storeProcessor = this.storeToProcessor.get(store);
KeyValueStore keyValueStore = (KeyValueStore) this.taskSideInputStorageManager.getStore(store);
Collection<Entry<?, ?>> entriesToBeWritten = storeProcessor.process(envelope, keyValueStore);
// TODO: SAMZA-2255: optimize writes to side input stores
for (Entry entry : entriesToBeWritten) {
// If the key is null we ignore, if the value is null, we issue a delete, else we issue a put
if (entry.getKey() != null) {
if (entry.getValue() != null) {
keyValueStore.put(entry.getKey(), entry.getValue());
} else {
keyValueStore.delete(entry.getKey());
}
}
}
}
this.lastProcessedOffsets.put(envelopeSSP, envelopeOffset);
checkCaughtUp(envelopeSSP, envelopeOffset, SystemStreamMetadata.OffsetType.NEWEST);
}
use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.
the class TestTaskContextImpl method testGetMissingStore.
/**
* Given that there is not a store corresponding to the storeName, getStore should throw an exception.
*/
@Test(expected = IllegalArgumentException.class)
public void testGetMissingStore() {
KeyValueStore store = mock(KeyValueStore.class);
when(keyValueStoreProvider.apply("myStore")).thenReturn(null);
assertEquals(store, taskContext.getStore("myStore"));
}
use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.
the class TestTaskContextImpl method testGetStore.
/**
* Given that there is a store corresponding to the storeName, getStore should return the store.
*/
@Test
public void testGetStore() {
KeyValueStore store = mock(KeyValueStore.class);
when(keyValueStoreProvider.apply("myStore")).thenReturn(store);
assertEquals(store, taskContext.getStore("myStore"));
}
use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.
the class TestOperatorImplGraph method testJoinChain.
@Test
public void testJoinChain() {
String inputStreamId1 = "input1";
String inputStreamId2 = "input2";
String inputSystem = "input-system";
String inputPhysicalName1 = "input-stream1";
String inputPhysicalName2 = "input-stream2";
HashMap<String, String> configs = new HashMap<>();
configs.put(JobConfig.JOB_NAME, "jobName");
configs.put(JobConfig.JOB_ID, "jobId");
StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputPhysicalName1);
StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputPhysicalName2);
Config config = new MapConfig(configs);
when(this.context.getJobContext().getConfig()).thenReturn(config);
Integer joinKey = new Integer(1);
Function<Object, Integer> keyFn = (Function & Serializable) m -> joinKey;
JoinFunction testJoinFunction = new TestJoinFunction("jobName-jobId-join-j1", (BiFunction & Serializable) (m1, m2) -> KV.of(m1, m2), keyFn, keyFn);
StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
inputStream1.join(inputStream2, testJoinFunction, mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j1");
}, config);
TaskName mockTaskName = mock(TaskName.class);
TaskModel taskModel = mock(TaskModel.class);
when(taskModel.getTaskName()).thenReturn(mockTaskName);
when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
KeyValueStore mockLeftStore = mock(KeyValueStore.class);
when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(mockLeftStore);
KeyValueStore mockRightStore = mock(KeyValueStore.class);
when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(mockRightStore);
OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
// verify that join function is initialized once.
assertEquals(TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1").numInitCalled, 1);
InputOperatorImpl inputOpImpl1 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName1));
InputOperatorImpl inputOpImpl2 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName2));
PartialJoinOperatorImpl leftPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl1.registeredOperators.iterator().next();
PartialJoinOperatorImpl rightPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl2.registeredOperators.iterator().next();
assertEquals(leftPartialJoinOpImpl.getOperatorSpec(), rightPartialJoinOpImpl.getOperatorSpec());
assertNotSame(leftPartialJoinOpImpl, rightPartialJoinOpImpl);
// verify that left partial join operator calls getFirstKey
Object mockLeftMessage = mock(Object.class);
long currentTimeMillis = System.currentTimeMillis();
when(mockLeftStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockLeftMessage, currentTimeMillis));
IncomingMessageEnvelope leftMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockLeftMessage);
inputOpImpl1.onMessage(leftMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
// verify that right partial join operator calls getSecondKey
Object mockRightMessage = mock(Object.class);
when(mockRightStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockRightMessage, currentTimeMillis));
IncomingMessageEnvelope rightMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockRightMessage);
inputOpImpl2.onMessage(rightMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
// verify that the join function apply is called with the correct messages on match
assertEquals(((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.size(), 1);
KV joinResult = (KV) ((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.iterator().next();
assertEquals(joinResult.getKey(), mockLeftMessage);
assertEquals(joinResult.getValue(), mockRightMessage);
}
use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.
the class ContainerStorageManager method createSideInputProcessors.
// Create sideInput store processors, one per store per task
private Map<TaskName, Map<String, SideInputsProcessor>> createSideInputProcessors(StorageConfig config, ContainerModel containerModel, Map<TaskName, TaskInstanceMetrics> taskInstanceMetrics) {
Map<TaskName, Map<String, SideInputsProcessor>> sideInputStoresToProcessors = new HashMap<>();
containerModel.getTasks().forEach((taskName, taskModel) -> {
sideInputStoresToProcessors.put(taskName, new HashMap<>());
TaskMode taskMode = taskModel.getTaskMode();
for (String storeName : this.taskSideInputStoreSSPs.get(taskName).keySet()) {
SideInputsProcessor sideInputsProcessor;
Optional<String> sideInputsProcessorSerializedInstance = config.getSideInputsProcessorSerializedInstance(storeName);
if (sideInputsProcessorSerializedInstance.isPresent()) {
sideInputsProcessor = SerdeUtils.deserialize("Side Inputs Processor", sideInputsProcessorSerializedInstance.get());
LOG.info("Using serialized side-inputs-processor for store: {}, task: {}", storeName, taskName);
} else if (config.getSideInputsProcessorFactory(storeName).isPresent()) {
String sideInputsProcessorFactoryClassName = config.getSideInputsProcessorFactory(storeName).get();
SideInputsProcessorFactory sideInputsProcessorFactory = ReflectionUtil.getObj(sideInputsProcessorFactoryClassName, SideInputsProcessorFactory.class);
sideInputsProcessor = sideInputsProcessorFactory.getSideInputsProcessor(config, taskInstanceMetrics.get(taskName).registry());
LOG.info("Using side-inputs-processor from factory: {} for store: {}, task: {}", config.getSideInputsProcessorFactory(storeName).get(), storeName, taskName);
} else {
// if this is a active-task with a side-input store but no sideinput-processor-factory defined in config, we rely on upstream validations to fail the deploy
// if this is a standby-task and the store is a non-side-input changelog store
// we creating identity sideInputProcessor for stores of standbyTasks
// have to use the right serde because the sideInput stores are created
Serde keySerde = serdes.get(config.getStorageKeySerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage key serde for store: " + storeName)));
Serde msgSerde = serdes.get(config.getStorageMsgSerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage msg serde for store: " + storeName)));
sideInputsProcessor = new SideInputsProcessor() {
@Override
public Collection<Entry<?, ?>> process(IncomingMessageEnvelope message, KeyValueStore store) {
// Ignore message if the key is null
if (message.getKey() == null) {
return ImmutableList.of();
} else {
// Skip serde if the message is null
return ImmutableList.of(new Entry<>(keySerde.fromBytes((byte[]) message.getKey()), message.getMessage() == null ? null : msgSerde.fromBytes((byte[]) message.getMessage())));
}
}
};
LOG.info("Using identity side-inputs-processor for store: {}, task: {}", storeName, taskName);
}
sideInputStoresToProcessors.get(taskName).put(storeName, sideInputsProcessor);
}
});
return sideInputStoresToProcessors;
}
Aggregations