Search in sources :

Example 1 with KeyValueStore

use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.

the class TaskSideInputHandler method process.

/**
 * Processes the incoming side input message envelope and updates the last processed offset for its SSP.
 * Synchronized inorder to be exclusive with flush().
 *
 * @param envelope incoming envelope to be processed
 */
public synchronized void process(IncomingMessageEnvelope envelope) {
    SystemStreamPartition envelopeSSP = envelope.getSystemStreamPartition();
    String envelopeOffset = envelope.getOffset();
    for (String store : this.sspToStores.get(envelopeSSP)) {
        SideInputsProcessor storeProcessor = this.storeToProcessor.get(store);
        KeyValueStore keyValueStore = (KeyValueStore) this.taskSideInputStorageManager.getStore(store);
        Collection<Entry<?, ?>> entriesToBeWritten = storeProcessor.process(envelope, keyValueStore);
        // TODO: SAMZA-2255: optimize writes to side input stores
        for (Entry entry : entriesToBeWritten) {
            // If the key is null we ignore, if the value is null, we issue a delete, else we issue a put
            if (entry.getKey() != null) {
                if (entry.getValue() != null) {
                    keyValueStore.put(entry.getKey(), entry.getValue());
                } else {
                    keyValueStore.delete(entry.getKey());
                }
            }
        }
    }
    this.lastProcessedOffsets.put(envelopeSSP, envelopeOffset);
    checkCaughtUp(envelopeSSP, envelopeOffset, SystemStreamMetadata.OffsetType.NEWEST);
}
Also used : Entry(org.apache.samza.storage.kv.Entry) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 2 with KeyValueStore

use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.

the class TestTaskContextImpl method testGetMissingStore.

/**
 * Given that there is not a store corresponding to the storeName, getStore should throw an exception.
 */
@Test(expected = IllegalArgumentException.class)
public void testGetMissingStore() {
    KeyValueStore store = mock(KeyValueStore.class);
    when(keyValueStoreProvider.apply("myStore")).thenReturn(null);
    assertEquals(store, taskContext.getStore("myStore"));
}
Also used : KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Test(org.junit.Test)

Example 3 with KeyValueStore

use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.

the class TestTaskContextImpl method testGetStore.

/**
 * Given that there is a store corresponding to the storeName, getStore should return the store.
 */
@Test
public void testGetStore() {
    KeyValueStore store = mock(KeyValueStore.class);
    when(keyValueStoreProvider.apply("myStore")).thenReturn(store);
    assertEquals(store, taskContext.getStore("myStore"));
}
Also used : KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Test(org.junit.Test)

Example 4 with KeyValueStore

use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.

the class TestOperatorImplGraph method testJoinChain.

@Test
public void testJoinChain() {
    String inputStreamId1 = "input1";
    String inputStreamId2 = "input2";
    String inputSystem = "input-system";
    String inputPhysicalName1 = "input-stream1";
    String inputPhysicalName2 = "input-stream2";
    HashMap<String, String> configs = new HashMap<>();
    configs.put(JobConfig.JOB_NAME, "jobName");
    configs.put(JobConfig.JOB_ID, "jobId");
    StreamTestUtils.addStreamConfigs(configs, inputStreamId1, inputSystem, inputPhysicalName1);
    StreamTestUtils.addStreamConfigs(configs, inputStreamId2, inputSystem, inputPhysicalName2);
    Config config = new MapConfig(configs);
    when(this.context.getJobContext().getConfig()).thenReturn(config);
    Integer joinKey = new Integer(1);
    Function<Object, Integer> keyFn = (Function & Serializable) m -> joinKey;
    JoinFunction testJoinFunction = new TestJoinFunction("jobName-jobId-join-j1", (BiFunction & Serializable) (m1, m2) -> KV.of(m1, m2), keyFn, keyFn);
    StreamApplicationDescriptorImpl graphSpec = new StreamApplicationDescriptorImpl(appDesc -> {
        GenericSystemDescriptor sd = new GenericSystemDescriptor(inputSystem, "mockFactoryClass");
        GenericInputDescriptor inputDescriptor1 = sd.getInputDescriptor(inputStreamId1, mock(Serde.class));
        GenericInputDescriptor inputDescriptor2 = sd.getInputDescriptor(inputStreamId2, mock(Serde.class));
        MessageStream<Object> inputStream1 = appDesc.getInputStream(inputDescriptor1);
        MessageStream<Object> inputStream2 = appDesc.getInputStream(inputDescriptor2);
        inputStream1.join(inputStream2, testJoinFunction, mock(Serde.class), mock(Serde.class), mock(Serde.class), Duration.ofHours(1), "j1");
    }, config);
    TaskName mockTaskName = mock(TaskName.class);
    TaskModel taskModel = mock(TaskModel.class);
    when(taskModel.getTaskName()).thenReturn(mockTaskName);
    when(this.context.getTaskContext().getTaskModel()).thenReturn(taskModel);
    KeyValueStore mockLeftStore = mock(KeyValueStore.class);
    when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-L"))).thenReturn(mockLeftStore);
    KeyValueStore mockRightStore = mock(KeyValueStore.class);
    when(this.context.getTaskContext().getStore(eq("jobName-jobId-join-j1-R"))).thenReturn(mockRightStore);
    OperatorImplGraph opImplGraph = new OperatorImplGraph(graphSpec.getOperatorSpecGraph(), this.context, mock(Clock.class));
    // verify that join function is initialized once.
    assertEquals(TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1").numInitCalled, 1);
    InputOperatorImpl inputOpImpl1 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName1));
    InputOperatorImpl inputOpImpl2 = opImplGraph.getInputOperator(new SystemStream(inputSystem, inputPhysicalName2));
    PartialJoinOperatorImpl leftPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl1.registeredOperators.iterator().next();
    PartialJoinOperatorImpl rightPartialJoinOpImpl = (PartialJoinOperatorImpl) inputOpImpl2.registeredOperators.iterator().next();
    assertEquals(leftPartialJoinOpImpl.getOperatorSpec(), rightPartialJoinOpImpl.getOperatorSpec());
    assertNotSame(leftPartialJoinOpImpl, rightPartialJoinOpImpl);
    // verify that left partial join operator calls getFirstKey
    Object mockLeftMessage = mock(Object.class);
    long currentTimeMillis = System.currentTimeMillis();
    when(mockLeftStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockLeftMessage, currentTimeMillis));
    IncomingMessageEnvelope leftMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockLeftMessage);
    inputOpImpl1.onMessage(leftMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
    // verify that right partial join operator calls getSecondKey
    Object mockRightMessage = mock(Object.class);
    when(mockRightStore.get(eq(joinKey))).thenReturn(new TimestampedValue<>(mockRightMessage, currentTimeMillis));
    IncomingMessageEnvelope rightMessage = new IncomingMessageEnvelope(mock(SystemStreamPartition.class), "", "", mockRightMessage);
    inputOpImpl2.onMessage(rightMessage, mock(MessageCollector.class), mock(TaskCoordinator.class));
    // verify that the join function apply is called with the correct messages on match
    assertEquals(((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.size(), 1);
    KV joinResult = (KV) ((TestJoinFunction) TestJoinFunction.getInstanceByTaskName(mockTaskName, "jobName-jobId-join-j1")).joinResults.iterator().next();
    assertEquals(joinResult.getKey(), mockLeftMessage);
    assertEquals(joinResult.getValue(), mockRightMessage);
}
Also used : StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) BiFunction(java.util.function.BiFunction) Assert.assertNotSame(org.junit.Assert.assertNotSame) TaskModel(org.apache.samza.job.model.TaskModel) TimestampedValue(org.apache.samza.util.TimestampedValue) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StringSerde(org.apache.samza.serializers.StringSerde) HashMultimap(com.google.common.collect.HashMultimap) Matchers.eq(org.mockito.Matchers.eq) After(org.junit.After) Duration(java.time.Duration) Map(java.util.Map) MapConfig(org.apache.samza.config.MapConfig) KV(org.apache.samza.operators.KV) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) Collection(java.util.Collection) Set(java.util.Set) Serializable(java.io.Serializable) Context(org.apache.samza.context.Context) List(java.util.List) SystemClock(org.apache.samza.util.SystemClock) Config(org.apache.samza.config.Config) KVSerde(org.apache.samza.serializers.KVSerde) OutputStream(org.apache.samza.operators.OutputStream) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) Mockito.mock(org.mockito.Mockito.mock) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) JobConfig(org.apache.samza.config.JobConfig) ClosableFunction(org.apache.samza.operators.functions.ClosableFunction) Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) StreamConfig(org.apache.samza.config.StreamConfig) MapFunction(org.apache.samza.operators.functions.MapFunction) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) StreamTestUtils(org.apache.samza.testUtils.StreamTestUtils) MessageCollector(org.apache.samza.task.MessageCollector) SystemStream(org.apache.samza.system.SystemStream) MockContext(org.apache.samza.context.MockContext) IntegerSerde(org.apache.samza.serializers.IntegerSerde) JobModel(org.apache.samza.job.model.JobModel) MessageStream(org.apache.samza.operators.MessageStream) Before(org.junit.Before) OpCode(org.apache.samza.operators.spec.OperatorSpec.OpCode) FilterFunction(org.apache.samza.operators.functions.FilterFunction) GenericOutputDescriptor(org.apache.samza.system.descriptors.GenericOutputDescriptor) Partition(org.apache.samza.Partition) Assert.assertTrue(org.junit.Assert.assertTrue) InitableFunction(org.apache.samza.operators.functions.InitableFunction) Clock(org.apache.samza.util.Clock) Test(org.junit.Test) Mockito.when(org.mockito.Mockito.when) JoinFunction(org.apache.samza.operators.functions.JoinFunction) TaskCoordinator(org.apache.samza.task.TaskCoordinator) ContainerModel(org.apache.samza.job.model.ContainerModel) TaskContextImpl(org.apache.samza.context.TaskContextImpl) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) StringSerde(org.apache.samza.serializers.StringSerde) KVSerde(org.apache.samza.serializers.KVSerde) Serde(org.apache.samza.serializers.Serde) IntegerSerde(org.apache.samza.serializers.IntegerSerde) Serializable(java.io.Serializable) HashMap(java.util.HashMap) MapConfig(org.apache.samza.config.MapConfig) Config(org.apache.samza.config.Config) JobConfig(org.apache.samza.config.JobConfig) StreamConfig(org.apache.samza.config.StreamConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) SystemClock(org.apache.samza.util.SystemClock) Clock(org.apache.samza.util.Clock) GenericInputDescriptor(org.apache.samza.system.descriptors.GenericInputDescriptor) StreamApplicationDescriptorImpl(org.apache.samza.application.descriptors.StreamApplicationDescriptorImpl) MessageCollector(org.apache.samza.task.MessageCollector) MapConfig(org.apache.samza.config.MapConfig) SystemStream(org.apache.samza.system.SystemStream) TaskCoordinator(org.apache.samza.task.TaskCoordinator) KV(org.apache.samza.operators.KV) BiFunction(java.util.function.BiFunction) TaskName(org.apache.samza.container.TaskName) JoinFunction(org.apache.samza.operators.functions.JoinFunction) GenericSystemDescriptor(org.apache.samza.system.descriptors.GenericSystemDescriptor) TaskModel(org.apache.samza.job.model.TaskModel) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 5 with KeyValueStore

use of org.apache.samza.storage.kv.KeyValueStore in project samza by apache.

the class ContainerStorageManager method createSideInputProcessors.

// Create sideInput store processors, one per store per task
private Map<TaskName, Map<String, SideInputsProcessor>> createSideInputProcessors(StorageConfig config, ContainerModel containerModel, Map<TaskName, TaskInstanceMetrics> taskInstanceMetrics) {
    Map<TaskName, Map<String, SideInputsProcessor>> sideInputStoresToProcessors = new HashMap<>();
    containerModel.getTasks().forEach((taskName, taskModel) -> {
        sideInputStoresToProcessors.put(taskName, new HashMap<>());
        TaskMode taskMode = taskModel.getTaskMode();
        for (String storeName : this.taskSideInputStoreSSPs.get(taskName).keySet()) {
            SideInputsProcessor sideInputsProcessor;
            Optional<String> sideInputsProcessorSerializedInstance = config.getSideInputsProcessorSerializedInstance(storeName);
            if (sideInputsProcessorSerializedInstance.isPresent()) {
                sideInputsProcessor = SerdeUtils.deserialize("Side Inputs Processor", sideInputsProcessorSerializedInstance.get());
                LOG.info("Using serialized side-inputs-processor for store: {}, task: {}", storeName, taskName);
            } else if (config.getSideInputsProcessorFactory(storeName).isPresent()) {
                String sideInputsProcessorFactoryClassName = config.getSideInputsProcessorFactory(storeName).get();
                SideInputsProcessorFactory sideInputsProcessorFactory = ReflectionUtil.getObj(sideInputsProcessorFactoryClassName, SideInputsProcessorFactory.class);
                sideInputsProcessor = sideInputsProcessorFactory.getSideInputsProcessor(config, taskInstanceMetrics.get(taskName).registry());
                LOG.info("Using side-inputs-processor from factory: {} for store: {}, task: {}", config.getSideInputsProcessorFactory(storeName).get(), storeName, taskName);
            } else {
                // if this is a active-task with a side-input store but no sideinput-processor-factory defined in config, we rely on upstream validations to fail the deploy
                // if this is a standby-task and the store is a non-side-input changelog store
                // we creating identity sideInputProcessor for stores of standbyTasks
                // have to use the right serde because the sideInput stores are created
                Serde keySerde = serdes.get(config.getStorageKeySerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage key serde for store: " + storeName)));
                Serde msgSerde = serdes.get(config.getStorageMsgSerde(storeName).orElseThrow(() -> new SamzaException("Could not find storage msg serde for store: " + storeName)));
                sideInputsProcessor = new SideInputsProcessor() {

                    @Override
                    public Collection<Entry<?, ?>> process(IncomingMessageEnvelope message, KeyValueStore store) {
                        // Ignore message if the key is null
                        if (message.getKey() == null) {
                            return ImmutableList.of();
                        } else {
                            // Skip serde if the message is null
                            return ImmutableList.of(new Entry<>(keySerde.fromBytes((byte[]) message.getKey()), message.getMessage() == null ? null : msgSerde.fromBytes((byte[]) message.getMessage())));
                        }
                    }
                };
                LOG.info("Using identity side-inputs-processor for store: {}, task: {}", storeName, taskName);
            }
            sideInputStoresToProcessors.get(taskName).put(storeName, sideInputsProcessor);
        }
    });
    return sideInputStoresToProcessors;
}
Also used : Serde(org.apache.samza.serializers.Serde) HashMap(java.util.HashMap) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) KeyValueStore(org.apache.samza.storage.kv.KeyValueStore) TaskMode(org.apache.samza.job.model.TaskMode) SamzaException(org.apache.samza.SamzaException) Entry(org.apache.samza.storage.kv.Entry) TaskName(org.apache.samza.container.TaskName) Map(java.util.Map) MetricsRegistryMap(org.apache.samza.metrics.MetricsRegistryMap) HashMap(java.util.HashMap)

Aggregations

KeyValueStore (org.apache.samza.storage.kv.KeyValueStore)6 HashMap (java.util.HashMap)2 Map (java.util.Map)2 TaskName (org.apache.samza.container.TaskName)2 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2 Serde (org.apache.samza.serializers.Serde)2 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)2 Test (org.junit.Test)2 HashMultimap (com.google.common.collect.HashMultimap)1 Multimap (com.google.common.collect.Multimap)1 Serializable (java.io.Serializable)1 Duration (java.time.Duration)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Set (java.util.Set)1 BiFunction (java.util.function.BiFunction)1 Function (java.util.function.Function)1