Search in sources :

Example 6 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestCoordinatorStreamSystemConsumer method testOrderKeyRewrite.

/**
   * Verify that if a particular key-value is written, then another, then the original again,
   * that the original occurs last in the set.
   */
@Test
public void testOrderKeyRewrite() throws InterruptedException {
    final SystemStream systemStream = new SystemStream("system", "stream");
    final SystemStreamPartition ssp = new SystemStreamPartition(systemStream, new Partition(0));
    final SystemConsumer systemConsumer = mock(SystemConsumer.class);
    final List<IncomingMessageEnvelope> list = new ArrayList<>();
    SetConfig setConfig1 = new SetConfig("source", "key1", "value1");
    SetConfig setConfig2 = new SetConfig("source", "key1", "value2");
    SetConfig setConfig3 = new SetConfig("source", "key1", "value1");
    list.add(createIncomingMessageEnvelope(setConfig1, ssp));
    list.add(createIncomingMessageEnvelope(setConfig2, ssp));
    list.add(createIncomingMessageEnvelope(setConfig3, ssp));
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> messages = new HashMap<SystemStreamPartition, List<IncomingMessageEnvelope>>() {

        {
            put(ssp, list);
        }
    };
    when(systemConsumer.poll(anySet(), anyLong())).thenReturn(messages, Collections.<SystemStreamPartition, List<IncomingMessageEnvelope>>emptyMap());
    CoordinatorStreamSystemConsumer consumer = new CoordinatorStreamSystemConsumer(systemStream, systemConsumer, new SinglePartitionWithoutOffsetsSystemAdmin());
    consumer.bootstrap();
    Set<CoordinatorStreamMessage> bootstrappedMessages = consumer.getBoostrappedStream();
    // First message should have been removed as a duplicate
    assertEquals(2, bootstrappedMessages.size());
    CoordinatorStreamMessage[] coordinatorStreamMessages = bootstrappedMessages.toArray(new CoordinatorStreamMessage[2]);
    assertEquals(setConfig2, coordinatorStreamMessages[0]);
    //Config 3 MUST be the last message, not config 2
    assertEquals(setConfig3, coordinatorStreamMessages[1]);
}
Also used : SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Partition(org.apache.samza.Partition) SystemConsumer(org.apache.samza.system.SystemConsumer) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SystemStream(org.apache.samza.system.SystemStream) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) ArrayList(java.util.ArrayList) CoordinatorStreamMessage(org.apache.samza.coordinator.stream.messages.CoordinatorStreamMessage) SetConfig(org.apache.samza.coordinator.stream.messages.SetConfig) SinglePartitionWithoutOffsetsSystemAdmin(org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin) ArrayList(java.util.ArrayList) List(java.util.List) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 7 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class MockCoordinatorStreamSystemFactory method getConsumer.

/**
   * Returns a consumer that sends all configs to the coordinator stream.
   *
   * @param config Along with the configs, you can pass checkpoints and changelog stream messages into the stream.
   *               The expected pattern is cp:source:taskname -> ssp,offset for checkpoint (Use sspToString util)
   *               ch:source:taskname -> changelogPartition for changelog
   *               Everything else is processed as normal config
   */
public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) {
    if (useCachedConsumer && mockConsumer != null) {
        return mockConsumer;
    }
    String jobName = config.get("job.name");
    String jobId = config.get("job.id");
    if (jobName == null) {
        throw new ConfigException("Must define job.name.");
    }
    if (jobId == null) {
        jobId = "1";
    }
    String streamName = Util.getCoordinatorStreamName(jobName, jobId);
    SystemStreamPartition systemStreamPartition = new SystemStreamPartition(systemName, streamName, new Partition(0));
    mockConsumer = new MockCoordinatorStreamWrappedConsumer(systemStreamPartition, config);
    return mockConsumer;
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) ConfigException(org.apache.samza.config.ConfigException) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition)

Example 8 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestHdfsSystemConsumer method testHdfsSystemConsumerE2E.

/*
   * A simple end to end test that covers the workflow from system admin to
   * partitioner, system consumer, and so on, making sure the basic functionality
   * works as expected.
   */
@Test
public void testHdfsSystemConsumerE2E() throws Exception {
    Config config = generateDefaultConfig();
    HdfsSystemFactory systemFactory = new HdfsSystemFactory();
    // create admin and do partitioning
    HdfsSystemAdmin systemAdmin = systemFactory.getAdmin(SYSTEM_NAME, config);
    String streamName = WORKING_DIRECTORY;
    Set<String> streamNames = new HashSet<>();
    streamNames.add(streamName);
    generateAvroDataFiles();
    Map<String, SystemStreamMetadata> streamMetadataMap = systemAdmin.getSystemStreamMetadata(streamNames);
    SystemStreamMetadata systemStreamMetadata = streamMetadataMap.get(streamName);
    Assert.assertEquals(NUM_FILES, systemStreamMetadata.getSystemStreamPartitionMetadata().size());
    // create consumer and read from files
    HdfsSystemConsumer systemConsumer = systemFactory.getConsumer(SYSTEM_NAME, config, new NoOpMetricsRegistry());
    Map<Partition, SystemStreamMetadata.SystemStreamPartitionMetadata> metadataMap = systemStreamMetadata.getSystemStreamPartitionMetadata();
    Set<SystemStreamPartition> systemStreamPartitionSet = new HashSet<>();
    metadataMap.forEach((partition, metadata) -> {
        SystemStreamPartition ssp = new SystemStreamPartition(SYSTEM_NAME, streamName, partition);
        systemStreamPartitionSet.add(ssp);
        String offset = metadata.getOldestOffset();
        systemConsumer.register(ssp, offset);
    });
    systemConsumer.start();
    // verify events read from consumer
    int eventsReceived = 0;
    // one "End of Stream" event in the end
    int totalEvents = (NUM_EVENTS + 1) * NUM_FILES;
    int remainingRetires = 100;
    Map<SystemStreamPartition, List<IncomingMessageEnvelope>> overallResults = new HashMap<>();
    while (eventsReceived < totalEvents && remainingRetires > 0) {
        remainingRetires--;
        Map<SystemStreamPartition, List<IncomingMessageEnvelope>> result = systemConsumer.poll(systemStreamPartitionSet, 200);
        for (SystemStreamPartition ssp : result.keySet()) {
            List<IncomingMessageEnvelope> messageEnvelopeList = result.get(ssp);
            overallResults.putIfAbsent(ssp, new ArrayList<>());
            overallResults.get(ssp).addAll(messageEnvelopeList);
            if (overallResults.get(ssp).size() >= NUM_EVENTS + 1) {
                systemStreamPartitionSet.remove(ssp);
            }
            eventsReceived += messageEnvelopeList.size();
        }
    }
    Assert.assertEquals(eventsReceived, totalEvents);
    Assert.assertEquals(NUM_FILES, overallResults.size());
    overallResults.values().forEach(messages -> {
        Assert.assertEquals(NUM_EVENTS + 1, messages.size());
        for (int index = 0; index < NUM_EVENTS; index++) {
            GenericRecord record = (GenericRecord) messages.get(index).getMessage();
            Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
            Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
        }
        Assert.assertEquals(messages.get(NUM_EVENTS).getOffset(), IncomingMessageEnvelope.END_OF_STREAM_OFFSET);
    });
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) NoOpMetricsRegistry(org.apache.samza.util.NoOpMetricsRegistry) ArrayList(java.util.ArrayList) List(java.util.List) GenericRecord(org.apache.avro.generic.GenericRecord) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 9 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestTaskCallbackManager method testUpdateCallbackWithCoordinatorRequests.

@Test
public void testUpdateCallbackWithCoordinatorRequests() {
    TaskName taskName = new TaskName("Partition 0");
    SystemStreamPartition ssp = new SystemStreamPartition("kafka", "topic", new Partition(0));
    // simulate out of order
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp, "2", null, null);
    ReadableCoordinator coordinator2 = new ReadableCoordinator(taskName);
    coordinator2.shutdown(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
    TaskCallbackImpl callback2 = new TaskCallbackImpl(listener, taskName, envelope2, coordinator2, 2, 0);
    List<TaskCallbackImpl> callbacksToUpdate = callbackManager.updateCallback(callback2);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp, "1", null, null);
    ReadableCoordinator coordinator1 = new ReadableCoordinator(taskName);
    coordinator1.commit(TaskCoordinator.RequestScope.CURRENT_TASK);
    TaskCallbackImpl callback1 = new TaskCallbackImpl(listener, taskName, envelope1, coordinator1, 1, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback1);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope0 = new IncomingMessageEnvelope(ssp, "0", null, null);
    ReadableCoordinator coordinator = new ReadableCoordinator(taskName);
    TaskCallbackImpl callback0 = new TaskCallbackImpl(listener, taskName, envelope0, coordinator, 0, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback0);
    assertEquals(2, callbacksToUpdate.size());
    //Check for envelope0
    TaskCallbackImpl taskCallback = callbacksToUpdate.get(0);
    assertTrue(taskCallback.matchSeqNum(0));
    assertEquals(ssp, taskCallback.envelope.getSystemStreamPartition());
    assertEquals("0", taskCallback.envelope.getOffset());
    //Check for envelope1
    taskCallback = callbacksToUpdate.get(1);
    assertTrue(taskCallback.matchSeqNum(1));
    assertEquals(ssp, taskCallback.envelope.getSystemStreamPartition());
    assertEquals("1", taskCallback.envelope.getOffset());
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 10 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestTaskCallbackManager method testUpdateShouldReturnAllCompletedCallbacksTillTheCommitRequestDefined.

@Test
public void testUpdateShouldReturnAllCompletedCallbacksTillTheCommitRequestDefined() {
    TaskName taskName = new TaskName("Partition 0");
    SystemStreamPartition ssp1 = new SystemStreamPartition("kafka", "topic", new Partition(0));
    SystemStreamPartition ssp2 = new SystemStreamPartition("kafka", "topic", new Partition(0));
    // Callback for Envelope3 contains commit request.
    IncomingMessageEnvelope envelope3 = new IncomingMessageEnvelope(ssp2, "0", null, null);
    ReadableCoordinator coordinator3 = new ReadableCoordinator(taskName);
    coordinator3.commit(TaskCoordinator.RequestScope.CURRENT_TASK);
    TaskCallbackImpl callback3 = new TaskCallbackImpl(listener, taskName, envelope3, coordinator3, 3, 0);
    List<TaskCallbackImpl> callbacksToUpdate = callbackManager.updateCallback(callback3);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp1, "2", null, null);
    ReadableCoordinator coordinator2 = new ReadableCoordinator(taskName);
    coordinator2.shutdown(TaskCoordinator.RequestScope.ALL_TASKS_IN_CONTAINER);
    TaskCallbackImpl callback2 = new TaskCallbackImpl(listener, taskName, envelope2, coordinator2, 2, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback2);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp1, "1", null, null);
    ReadableCoordinator coordinator1 = new ReadableCoordinator(taskName);
    coordinator1.commit(TaskCoordinator.RequestScope.CURRENT_TASK);
    TaskCallbackImpl callback1 = new TaskCallbackImpl(listener, taskName, envelope1, coordinator1, 1, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback1);
    assertTrue(callbacksToUpdate.isEmpty());
    // Callback for Envelope0 contains commit request.
    IncomingMessageEnvelope envelope0 = new IncomingMessageEnvelope(ssp1, "0", null, null);
    ReadableCoordinator coordinator = new ReadableCoordinator(taskName);
    TaskCallbackImpl callback0 = new TaskCallbackImpl(listener, taskName, envelope0, coordinator, 0, 0);
    // Check for both Envelope1, Envelope2, Envelope3 in callbacks to commit.
    // Two callbacks belonging to different system partition and has commitRequest defined is returned.
    callbacksToUpdate = callbackManager.updateCallback(callback0);
    assertEquals(2, callbacksToUpdate.size());
    TaskCallbackImpl callback = callbacksToUpdate.get(0);
    assertTrue(callback.matchSeqNum(0));
    assertEquals(envelope0.getSystemStreamPartition(), callback.envelope.getSystemStreamPartition());
    assertEquals(envelope0.getOffset(), callback.envelope.getOffset());
    callback = callbacksToUpdate.get(1);
    assertTrue(callback.matchSeqNum(1));
    assertEquals(envelope1.getSystemStreamPartition(), callback.envelope.getSystemStreamPartition());
    assertEquals(envelope1.getOffset(), callback.envelope.getOffset());
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Aggregations

Partition (org.apache.samza.Partition)42 Test (org.junit.Test)31 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)30 List (java.util.List)15 HashMap (java.util.HashMap)13 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)11 ArrayList (java.util.ArrayList)10 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)8 HashSet (java.util.HashSet)7 FileMetadata (org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata)7 GenericRecord (org.apache.avro.generic.GenericRecord)6 TaskName (org.apache.samza.container.TaskName)6 SamzaException (org.apache.samza.SamzaException)5 Config (org.apache.samza.config.Config)5 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)5 SystemStream (org.apache.samza.system.SystemStream)4 LinkedHashMap (java.util.LinkedHashMap)3 MapConfig (org.apache.samza.config.MapConfig)3 SinglePartitionWithoutOffsetsSystemAdmin (org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin)3 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2