use of org.apache.samza.Partition in project samza by apache.
the class TestDirectoryPartitioner method testValidDirectoryUpdating.
@Test
public void testValidDirectoryUpdating() {
// the update is valid when there are only new files being added to the directory
// no changes on the old files
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 6;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", "part-004.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*";
String blackList = "";
String groupPattern = "";
int EXPECTED_NUM_PARTITION = 6;
int[][] EXPECTED_PARTITIONING = { { 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
NUM_INPUT = 7;
String[] updatedInputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", "part-004.avro", // add a new file to the directory
"part-007.avro", "part-006.avro" };
long[] updatedFileLength = { 150582, 138132, 214005, 205738, 158273, 2513454, 982345 };
testList.clear();
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(updatedInputFiles[i], updatedFileLength[i]));
}
directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", descriporMap);
// still expect only 6 partitions instead of 7
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> updatedDescriptorMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, updatedDescriptorMap);
}
use of org.apache.samza.Partition in project samza by apache.
the class TestDirectoryPartitioner method testInvalidDirectoryUpdating.
@Test
public void testInvalidDirectoryUpdating() {
// the update is invalid when at least one old file is removed
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 6;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", "part-004.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*";
String blackList = "";
String groupPattern = "";
int EXPECTED_NUM_PARTITION = 6;
int[][] EXPECTED_PARTITIONING = { { 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
String[] updatedInputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", // remove part-004 and replace it with 007
"part-007.avro", "part-006.avro" };
long[] updatedFileLength = { 150582, 138132, 214005, 205738, 158273, 982345 };
testList.clear();
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(updatedInputFiles[i], updatedFileLength[i]));
}
directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
try {
directoryPartitioner.getPartitionMetadataMap("hdfs", descriporMap);
Assert.fail("Expect exception thrown from getting metadata. Should not reach this point.");
} catch (SamzaException e) {
// expect exception to be thrown
}
}
use of org.apache.samza.Partition in project samza by apache.
the class TestMultiFileHdfsReader method testSequentialRead.
@Test
public void testSequentialRead() throws Exception {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
int index = 0;
while (multiReader.hasNext()) {
GenericRecord record = (GenericRecord) multiReader.readNext().getMessage();
Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
index++;
}
Assert.assertEquals(3 * NUM_EVENTS, index);
multiReader.close();
}
use of org.apache.samza.Partition in project samza by apache.
the class TestMultiFileHdfsReader method testOutOfRangeFileIndex.
@Test(expected = SamzaException.class)
public void testOutOfRangeFileIndex() {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "3:0");
Assert.fail();
}
use of org.apache.samza.Partition in project samza by apache.
the class TestMultiFileHdfsReader method testReconnect.
@Test
public void testReconnect() {
SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
// first read a few events, and then reconnect
for (int i = 0; i < NUM_EVENTS / 2; i++) {
multiReader.readNext();
}
IncomingMessageEnvelope envelope = multiReader.readNext();
multiReader.reconnect();
IncomingMessageEnvelope envelopeAfterReconnect = multiReader.readNext();
Assert.assertEquals(envelope, envelopeAfterReconnect);
multiReader.close();
}
Aggregations