use of org.apache.samza.Partition in project samza by apache.
the class TestTaskConfigJava method testGetBroadcastSystemStreamPartitions.
@Test
public void testGetBroadcastSystemStreamPartitions() {
HashMap<String, String> map = new HashMap<String, String>();
map.put("task.broadcast.inputs", "kafka.foo#4, kafka.boo#5, kafka.z-o-o#[12-14], kafka.foo.bar#[3-4]");
Config config = new MapConfig(map);
TaskConfigJava taskConfig = new TaskConfigJava(config);
Set<SystemStreamPartition> systemStreamPartitionSet = taskConfig.getBroadcastSystemStreamPartitions();
HashSet<SystemStreamPartition> expected = new HashSet<SystemStreamPartition>();
expected.add(new SystemStreamPartition("kafka", "foo", new Partition(4)));
expected.add(new SystemStreamPartition("kafka", "boo", new Partition(5)));
expected.add(new SystemStreamPartition("kafka", "z-o-o", new Partition(12)));
expected.add(new SystemStreamPartition("kafka", "z-o-o", new Partition(13)));
expected.add(new SystemStreamPartition("kafka", "z-o-o", new Partition(14)));
expected.add(new SystemStreamPartition("kafka", "foo.bar", new Partition(3)));
expected.add(new SystemStreamPartition("kafka", "foo.bar", new Partition(4)));
assertEquals(expected, systemStreamPartitionSet);
map.put("task.broadcast.inputs", "kafka.foo");
taskConfig = new TaskConfigJava(new MapConfig(map));
boolean catchCorrectException = false;
try {
taskConfig.getBroadcastSystemStreamPartitions();
} catch (IllegalArgumentException e) {
catchCorrectException = true;
}
assertTrue(catchCorrectException);
map.put("task.broadcast.inputs", "kafka.org.apache.events.WhitelistedIps#1-2");
taskConfig = new TaskConfigJava(new MapConfig(map));
boolean invalidFormatException = false;
try {
taskConfig.getBroadcastSystemStreamPartitions();
} catch (IllegalArgumentException e) {
invalidFormatException = true;
}
assertTrue(invalidFormatException);
}
use of org.apache.samza.Partition in project samza by apache.
the class PartitionDescriptorUtil method getDescriptorMapFromJson.
public static Map<Partition, List<String>> getDescriptorMapFromJson(String json) {
try {
@SuppressWarnings("unchecked") Map<String, String> rawMap = new ObjectMapper().readValue(json, HashMap.class);
Map<Partition, List<String>> descriptorMap = new HashMap<>();
rawMap.forEach((key, value) -> descriptorMap.put(new Partition(Integer.valueOf(key)), getPathsFromString(value)));
return descriptorMap;
} catch (IOException | NumberFormatException e) {
throw new SamzaException("Failed to convert json: " + json, e);
}
}
use of org.apache.samza.Partition in project samza by apache.
the class DirectoryPartitioner method getPartitionMetadataMap.
/**
* Get partition metadata for a stream
* @param streamName name of the stream; should contain the information about the path of the
* root directory
* @param existingPartitionDescriptorMap map of the existing partition descriptor
* @return map of SSP metadata
*/
public Map<Partition, SystemStreamPartitionMetadata> getPartitionMetadataMap(String streamName, @Nullable Map<Partition, List<String>> existingPartitionDescriptorMap) {
LOG.info("Trying to obtain metadata for " + streamName);
LOG.info("Existing partition descriptor: " + (MapUtils.isEmpty(existingPartitionDescriptorMap) ? "empty" : existingPartitionDescriptorMap));
Map<Partition, SystemStreamPartitionMetadata> partitionMetadataMap = new HashMap<>();
partitionDescriptorMap.putIfAbsent(streamName, new HashMap<>());
List<FileMetadata> filteredFiles = getFilteredFiles(streamName);
if (!MapUtils.isEmpty(existingPartitionDescriptorMap)) {
filteredFiles = validateAndGetOriginalFilteredFiles(filteredFiles, existingPartitionDescriptorMap);
}
List<List<FileMetadata>> groupedPartitions = generatePartitionGroups(filteredFiles);
int partitionId = 0;
for (List<FileMetadata> fileGroup : groupedPartitions) {
Partition partition = new Partition(partitionId);
List<String> pathList = new ArrayList<>();
List<String> lengthList = new ArrayList<>();
fileGroup.forEach(fileMetadata -> {
pathList.add(fileMetadata.getPath());
lengthList.add(String.valueOf(fileMetadata.getLen()));
});
String oldestOffset = MultiFileHdfsReader.generateOffset(0, "0");
String newestOffset = MultiFileHdfsReader.generateOffset(lengthList.size() - 1, String.valueOf(lengthList.get(lengthList.size() - 1)));
SystemStreamPartitionMetadata metadata = new SystemStreamPartitionMetadata(oldestOffset, newestOffset, null);
partitionMetadataMap.put(partition, metadata);
partitionDescriptorMap.get(streamName).put(partition, pathList);
partitionId++;
}
LOG.info("Obtained metadata map as: " + partitionMetadataMap);
LOG.info("Computed partition description as: " + partitionDescriptorMap);
return partitionMetadataMap;
}
use of org.apache.samza.Partition in project samza by apache.
the class TestHdfsSystemConsumer method testEmptyStagingDirectory.
/*
* Ensure that empty staging directory will not break system admin,
* but should fail system consumer
*/
@Test
public void testEmptyStagingDirectory() throws Exception {
Map<String, String> configMap = new HashMap<>();
configMap.put(String.format(HdfsConfig.CONSUMER_PARTITIONER_WHITELIST(), SYSTEM_NAME), ".*avro");
Config config = new MapConfig(configMap);
HdfsSystemFactory systemFactory = new HdfsSystemFactory();
// create admin and do partitioning
HdfsSystemAdmin systemAdmin = systemFactory.getAdmin(SYSTEM_NAME, config);
String stream = WORKING_DIRECTORY;
Set<String> streamNames = new HashSet<>();
streamNames.add(stream);
generateAvroDataFiles();
Map<String, SystemStreamMetadata> streamMetadataMap = systemAdmin.getSystemStreamMetadata(streamNames);
SystemStreamMetadata systemStreamMetadata = streamMetadataMap.get(stream);
Assert.assertEquals(NUM_FILES, systemStreamMetadata.getSystemStreamPartitionMetadata().size());
// create consumer and read from files
HdfsSystemConsumer systemConsumer = systemFactory.getConsumer(SYSTEM_NAME, config, new NoOpMetricsRegistry());
Partition partition = new Partition(0);
SystemStreamPartition ssp = new SystemStreamPartition(SYSTEM_NAME, stream, partition);
try {
systemConsumer.register(ssp, "0");
Assert.fail("Empty staging directory should fail system consumer");
} catch (UncheckedExecutionException e) {
Assert.assertTrue(e.getCause() instanceof SamzaException);
}
}
use of org.apache.samza.Partition in project samza by apache.
the class TestTaskCallbackManager method testUpdateCallbackOutofOrder.
@Test
public void testUpdateCallbackOutofOrder() {
TaskName taskName = new TaskName("Partition 0");
SystemStreamPartition ssp = new SystemStreamPartition("kafka", "topic", new Partition(0));
ReadableCoordinator coordinator = new ReadableCoordinator(taskName);
// simulate out of order
IncomingMessageEnvelope envelope2 = new IncomingMessageEnvelope(ssp, "2", null, null);
TaskCallbackImpl callback2 = new TaskCallbackImpl(listener, taskName, envelope2, coordinator, 2, 0);
List<TaskCallbackImpl> callbacksToUpdate = callbackManager.updateCallback(callback2);
assertTrue(callbacksToUpdate.isEmpty());
IncomingMessageEnvelope envelope1 = new IncomingMessageEnvelope(ssp, "1", null, null);
TaskCallbackImpl callback1 = new TaskCallbackImpl(listener, taskName, envelope1, coordinator, 1, 0);
callbacksToUpdate = callbackManager.updateCallback(callback1);
assertTrue(callbacksToUpdate.isEmpty());
IncomingMessageEnvelope envelope0 = new IncomingMessageEnvelope(ssp, "0", null, null);
TaskCallbackImpl callback0 = new TaskCallbackImpl(listener, taskName, envelope0, coordinator, 0, 0);
callbacksToUpdate = callbackManager.updateCallback(callback0);
assertEquals(3, callbacksToUpdate.size());
TaskCallbackImpl callback = callbacksToUpdate.get(0);
assertTrue(callback.matchSeqNum(0));
assertEquals(ssp, callback.envelope.getSystemStreamPartition());
assertEquals("0", callback.envelope.getOffset());
callback = callbacksToUpdate.get(1);
assertTrue(callback.matchSeqNum(1));
assertEquals(ssp, callback.envelope.getSystemStreamPartition());
assertEquals("1", callback.envelope.getOffset());
callback = callbacksToUpdate.get(2);
assertTrue(callback.matchSeqNum(2));
assertEquals(ssp, callback.envelope.getSystemStreamPartition());
assertEquals("2", callback.envelope.getOffset());
}
Aggregations