Search in sources :

Example 26 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestTaskConfigJava method testGetBroadcastSystemStreamPartitions.

@Test
public void testGetBroadcastSystemStreamPartitions() {
    HashMap<String, String> map = new HashMap<String, String>();
    map.put("task.broadcast.inputs", "kafka.foo#4, kafka.boo#5, kafka.z-o-o#[12-14], kafka.foo.bar#[3-4]");
    Config config = new MapConfig(map);
    TaskConfigJava taskConfig = new TaskConfigJava(config);
    Set<SystemStreamPartition> systemStreamPartitionSet = taskConfig.getBroadcastSystemStreamPartitions();
    HashSet<SystemStreamPartition> expected = new HashSet<SystemStreamPartition>();
    expected.add(new SystemStreamPartition("kafka", "foo", new Partition(4)));
    expected.add(new SystemStreamPartition("kafka", "boo", new Partition(5)));
    expected.add(new SystemStreamPartition("kafka", "z-o-o", new Partition(12)));
    expected.add(new SystemStreamPartition("kafka", "z-o-o", new Partition(13)));
    expected.add(new SystemStreamPartition("kafka", "z-o-o", new Partition(14)));
    expected.add(new SystemStreamPartition("kafka", "foo.bar", new Partition(3)));
    expected.add(new SystemStreamPartition("kafka", "foo.bar", new Partition(4)));
    assertEquals(expected, systemStreamPartitionSet);
    map.put("task.broadcast.inputs", "kafka.foo");
    taskConfig = new TaskConfigJava(new MapConfig(map));
    boolean catchCorrectException = false;
    try {
        taskConfig.getBroadcastSystemStreamPartitions();
    } catch (IllegalArgumentException e) {
        catchCorrectException = true;
    }
    assertTrue(catchCorrectException);
    map.put("task.broadcast.inputs", "kafka.org.apache.events.WhitelistedIps#1-2");
    taskConfig = new TaskConfigJava(new MapConfig(map));
    boolean invalidFormatException = false;
    try {
        taskConfig.getBroadcastSystemStreamPartitions();
    } catch (IllegalArgumentException e) {
        invalidFormatException = true;
    }
    assertTrue(invalidFormatException);
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashMap(java.util.HashMap) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 27 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class PartitionDescriptorUtil method getDescriptorMapFromJson.

public static Map<Partition, List<String>> getDescriptorMapFromJson(String json) {
    try {
        @SuppressWarnings("unchecked") Map<String, String> rawMap = new ObjectMapper().readValue(json, HashMap.class);
        Map<Partition, List<String>> descriptorMap = new HashMap<>();
        rawMap.forEach((key, value) -> descriptorMap.put(new Partition(Integer.valueOf(key)), getPathsFromString(value)));
        return descriptorMap;
    } catch (IOException | NumberFormatException e) {
        throw new SamzaException("Failed to convert json: " + json, e);
    }
}
Also used : Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) List(java.util.List) IOException(java.io.IOException) SamzaException(org.apache.samza.SamzaException) ObjectMapper(org.codehaus.jackson.map.ObjectMapper)

Example 28 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class DirectoryPartitioner method getPartitionMetadataMap.

/**
   * Get partition metadata for a stream
   * @param streamName name of the stream; should contain the information about the path of the
   *                   root directory
   * @param existingPartitionDescriptorMap map of the existing partition descriptor
   * @return map of SSP metadata
   */
public Map<Partition, SystemStreamPartitionMetadata> getPartitionMetadataMap(String streamName, @Nullable Map<Partition, List<String>> existingPartitionDescriptorMap) {
    LOG.info("Trying to obtain metadata for " + streamName);
    LOG.info("Existing partition descriptor: " + (MapUtils.isEmpty(existingPartitionDescriptorMap) ? "empty" : existingPartitionDescriptorMap));
    Map<Partition, SystemStreamPartitionMetadata> partitionMetadataMap = new HashMap<>();
    partitionDescriptorMap.putIfAbsent(streamName, new HashMap<>());
    List<FileMetadata> filteredFiles = getFilteredFiles(streamName);
    if (!MapUtils.isEmpty(existingPartitionDescriptorMap)) {
        filteredFiles = validateAndGetOriginalFilteredFiles(filteredFiles, existingPartitionDescriptorMap);
    }
    List<List<FileMetadata>> groupedPartitions = generatePartitionGroups(filteredFiles);
    int partitionId = 0;
    for (List<FileMetadata> fileGroup : groupedPartitions) {
        Partition partition = new Partition(partitionId);
        List<String> pathList = new ArrayList<>();
        List<String> lengthList = new ArrayList<>();
        fileGroup.forEach(fileMetadata -> {
            pathList.add(fileMetadata.getPath());
            lengthList.add(String.valueOf(fileMetadata.getLen()));
        });
        String oldestOffset = MultiFileHdfsReader.generateOffset(0, "0");
        String newestOffset = MultiFileHdfsReader.generateOffset(lengthList.size() - 1, String.valueOf(lengthList.get(lengthList.size() - 1)));
        SystemStreamPartitionMetadata metadata = new SystemStreamPartitionMetadata(oldestOffset, newestOffset, null);
        partitionMetadataMap.put(partition, metadata);
        partitionDescriptorMap.get(streamName).put(partition, pathList);
        partitionId++;
    }
    LOG.info("Obtained metadata map as: " + partitionMetadataMap);
    LOG.info("Computed partition description as: " + partitionDescriptorMap);
    return partitionMetadataMap;
}
Also used : Partition(org.apache.samza.Partition) HashMap(java.util.HashMap) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) ArrayList(java.util.ArrayList) List(java.util.List)

Example 29 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestHdfsSystemConsumer method testEmptyStagingDirectory.

/*
   * Ensure that empty staging directory will not break system admin,
   * but should fail system consumer
   */
@Test
public void testEmptyStagingDirectory() throws Exception {
    Map<String, String> configMap = new HashMap<>();
    configMap.put(String.format(HdfsConfig.CONSUMER_PARTITIONER_WHITELIST(), SYSTEM_NAME), ".*avro");
    Config config = new MapConfig(configMap);
    HdfsSystemFactory systemFactory = new HdfsSystemFactory();
    // create admin and do partitioning
    HdfsSystemAdmin systemAdmin = systemFactory.getAdmin(SYSTEM_NAME, config);
    String stream = WORKING_DIRECTORY;
    Set<String> streamNames = new HashSet<>();
    streamNames.add(stream);
    generateAvroDataFiles();
    Map<String, SystemStreamMetadata> streamMetadataMap = systemAdmin.getSystemStreamMetadata(streamNames);
    SystemStreamMetadata systemStreamMetadata = streamMetadataMap.get(stream);
    Assert.assertEquals(NUM_FILES, systemStreamMetadata.getSystemStreamPartitionMetadata().size());
    // create consumer and read from files
    HdfsSystemConsumer systemConsumer = systemFactory.getConsumer(SYSTEM_NAME, config, new NoOpMetricsRegistry());
    Partition partition = new Partition(0);
    SystemStreamPartition ssp = new SystemStreamPartition(SYSTEM_NAME, stream, partition);
    try {
        systemConsumer.register(ssp, "0");
        Assert.fail("Empty staging directory should fail system consumer");
    } catch (UncheckedExecutionException e) {
        Assert.assertTrue(e.getCause() instanceof SamzaException);
    }
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) HashMap(java.util.HashMap) Config(org.apache.samza.config.Config) MapConfig(org.apache.samza.config.MapConfig) SystemStreamMetadata(org.apache.samza.system.SystemStreamMetadata) SamzaException(org.apache.samza.SamzaException) NoOpMetricsRegistry(org.apache.samza.util.NoOpMetricsRegistry) MapConfig(org.apache.samza.config.MapConfig) HashSet(java.util.HashSet) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 30 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestTaskCallbackManager method testUpdateCallbackOutofOrder.

@Test
public void testUpdateCallbackOutofOrder() {
    TaskName taskName = new TaskName("Partition 0");
    SystemStreamPartition ssp = new SystemStreamPartition("kafka", "topic", new Partition(0));
    ReadableCoordinator coordinator = new ReadableCoordinator(taskName);
    // simulate out of order
    IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp, "2", null, null);
    TaskCallbackImpl callback2 = new TaskCallbackImpl(listener, taskName, envelope2, coordinator, 2, 0);
    List<TaskCallbackImpl> callbacksToUpdate = callbackManager.updateCallback(callback2);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp, "1", null, null);
    TaskCallbackImpl callback1 = new TaskCallbackImpl(listener, taskName, envelope1, coordinator, 1, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback1);
    assertTrue(callbacksToUpdate.isEmpty());
    IncomingMessageEnvelope envelope0 = new IncomingMessageEnvelope(ssp, "0", null, null);
    TaskCallbackImpl callback0 = new TaskCallbackImpl(listener, taskName, envelope0, coordinator, 0, 0);
    callbacksToUpdate = callbackManager.updateCallback(callback0);
    assertEquals(3, callbacksToUpdate.size());
    TaskCallbackImpl callback = callbacksToUpdate.get(0);
    assertTrue(callback.matchSeqNum(0));
    assertEquals(ssp, callback.envelope.getSystemStreamPartition());
    assertEquals("0", callback.envelope.getOffset());
    callback = callbacksToUpdate.get(1);
    assertTrue(callback.matchSeqNum(1));
    assertEquals(ssp, callback.envelope.getSystemStreamPartition());
    assertEquals("1", callback.envelope.getOffset());
    callback = callbacksToUpdate.get(2);
    assertTrue(callback.matchSeqNum(2));
    assertEquals(ssp, callback.envelope.getSystemStreamPartition());
    assertEquals("2", callback.envelope.getOffset());
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) TaskName(org.apache.samza.container.TaskName) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Aggregations

Partition (org.apache.samza.Partition)42 Test (org.junit.Test)31 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)30 List (java.util.List)15 HashMap (java.util.HashMap)13 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)11 ArrayList (java.util.ArrayList)10 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)8 HashSet (java.util.HashSet)7 FileMetadata (org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata)7 GenericRecord (org.apache.avro.generic.GenericRecord)6 TaskName (org.apache.samza.container.TaskName)6 SamzaException (org.apache.samza.SamzaException)5 Config (org.apache.samza.config.Config)5 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)5 SystemStream (org.apache.samza.system.SystemStream)4 LinkedHashMap (java.util.LinkedHashMap)3 MapConfig (org.apache.samza.config.MapConfig)3 SinglePartitionWithoutOffsetsSystemAdmin (org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin)3 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2