Search in sources :

Example 16 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestDirectoryPartitioner method testValidDirectoryUpdating.

@Test
public void testValidDirectoryUpdating() {
    // the update is valid when there are only new files being added to the directory
    // no changes on the old files
    List<FileMetadata> testList = new ArrayList<>();
    int NUM_INPUT = 6;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", "part-004.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345 };
    for (int i = 0; i < NUM_INPUT; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = ".*";
    String blackList = "";
    String groupPattern = "";
    int EXPECTED_NUM_PARTITION = 6;
    int[][] EXPECTED_PARTITIONING = { { 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
    NUM_INPUT = 7;
    String[] updatedInputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", "part-004.avro", // add a new file to the directory
    "part-007.avro", "part-006.avro" };
    long[] updatedFileLength = { 150582, 138132, 214005, 205738, 158273, 2513454, 982345 };
    testList.clear();
    for (int i = 0; i < NUM_INPUT; i++) {
        testList.add(new FileMetadata(updatedInputFiles[i], updatedFileLength[i]));
    }
    directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", descriporMap);
    // still expect only 6 partitions instead of 7
    Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
    Map<Partition, List<String>> updatedDescriptorMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, updatedDescriptorMap);
}
Also used : Partition(org.apache.samza.Partition) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 17 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestDirectoryPartitioner method testInvalidDirectoryUpdating.

@Test
public void testInvalidDirectoryUpdating() {
    // the update is invalid when at least one old file is removed
    List<FileMetadata> testList = new ArrayList<>();
    int NUM_INPUT = 6;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", "part-004.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345 };
    for (int i = 0; i < NUM_INPUT; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = ".*";
    String blackList = "";
    String groupPattern = "";
    int EXPECTED_NUM_PARTITION = 6;
    int[][] EXPECTED_PARTITIONING = { { 0 }, { 1 }, { 2 }, { 3 }, { 4 }, { 5 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
    String[] updatedInputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "part-005.avro", // remove part-004 and replace it with 007
    "part-007.avro", "part-006.avro" };
    long[] updatedFileLength = { 150582, 138132, 214005, 205738, 158273, 982345 };
    testList.clear();
    for (int i = 0; i < NUM_INPUT; i++) {
        testList.add(new FileMetadata(updatedInputFiles[i], updatedFileLength[i]));
    }
    directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    try {
        directoryPartitioner.getPartitionMetadataMap("hdfs", descriporMap);
        Assert.fail("Expect exception thrown from getting metadata. Should not reach this point.");
    } catch (SamzaException e) {
    // expect exception to be thrown
    }
}
Also used : Partition(org.apache.samza.Partition) ArrayList(java.util.ArrayList) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) SamzaException(org.apache.samza.SamzaException) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 18 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestMultiFileHdfsReader method testSequentialRead.

@Test
public void testSequentialRead() throws Exception {
    SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
    MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
    int index = 0;
    while (multiReader.hasNext()) {
        GenericRecord record = (GenericRecord) multiReader.readNext().getMessage();
        Assert.assertEquals(index % NUM_EVENTS, record.get(FIELD_1));
        Assert.assertEquals("string_" + (index % NUM_EVENTS), record.get(FIELD_2).toString());
        index++;
    }
    Assert.assertEquals(3 * NUM_EVENTS, index);
    multiReader.close();
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) GenericRecord(org.apache.avro.generic.GenericRecord) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 19 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestMultiFileHdfsReader method testOutOfRangeFileIndex.

@Test(expected = SamzaException.class)
public void testOutOfRangeFileIndex() {
    SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
    new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "3:0");
    Assert.fail();
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Example 20 with Partition

use of org.apache.samza.Partition in project samza by apache.

the class TestMultiFileHdfsReader method testReconnect.

@Test
public void testReconnect() {
    SystemStreamPartition ssp = new SystemStreamPartition("hdfs", "testStream", new Partition(0));
    MultiFileHdfsReader multiReader = new MultiFileHdfsReader(HdfsReaderFactory.ReaderType.AVRO, ssp, Arrays.asList(descriptors), "0:0");
    // first read a few events, and then reconnect
    for (int i = 0; i < NUM_EVENTS / 2; i++) {
        multiReader.readNext();
    }
    IncomingMessageEnvelope envelope = multiReader.readNext();
    multiReader.reconnect();
    IncomingMessageEnvelope envelopeAfterReconnect = multiReader.readNext();
    Assert.assertEquals(envelope, envelopeAfterReconnect);
    multiReader.close();
}
Also used : Partition(org.apache.samza.Partition) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) IncomingMessageEnvelope(org.apache.samza.system.IncomingMessageEnvelope) SystemStreamPartition(org.apache.samza.system.SystemStreamPartition) Test(org.junit.Test)

Aggregations

Partition (org.apache.samza.Partition)42 Test (org.junit.Test)31 SystemStreamPartition (org.apache.samza.system.SystemStreamPartition)30 List (java.util.List)15 HashMap (java.util.HashMap)13 IncomingMessageEnvelope (org.apache.samza.system.IncomingMessageEnvelope)11 ArrayList (java.util.ArrayList)10 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)8 HashSet (java.util.HashSet)7 FileMetadata (org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata)7 GenericRecord (org.apache.avro.generic.GenericRecord)6 TaskName (org.apache.samza.container.TaskName)6 SamzaException (org.apache.samza.SamzaException)5 Config (org.apache.samza.config.Config)5 SystemStreamMetadata (org.apache.samza.system.SystemStreamMetadata)5 SystemStream (org.apache.samza.system.SystemStream)4 LinkedHashMap (java.util.LinkedHashMap)3 MapConfig (org.apache.samza.config.MapConfig)3 SinglePartitionWithoutOffsetsSystemAdmin (org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin)3 MetricsRegistryMap (org.apache.samza.metrics.MetricsRegistryMap)2