Search in sources :

Example 6 with FileMetadata

use of org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata in project samza by apache.

the class TestDirectoryPartitioner method testBasicBlackListFiltering.

@Test
public void testBasicBlackListFiltering() {
    List<FileMetadata> testList = new ArrayList<>();
    int numInput = 9;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < numInput; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = ".*";
    String blackList = "delta-.*\\.avro";
    String groupPattern = "";
    int expectedNumPartition = 6;
    int[][] expectedPartitioning = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(expectedNumPartition, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
Also used : Partition(org.apache.samza.Partition) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 7 with FileMetadata

use of org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata in project samza by apache.

the class TestDirectoryPartitioner method testBasicGrouping.

@Test
public void testBasicGrouping() {
    List<FileMetadata> testList = new ArrayList<>();
    int numInput = 9;
    String[] inputFiles = { "00_10-run_2016-08-15-13-04-part.0.150582.avro", "00_10-run_2016-08-15-13-04-part.1.138132.avro", "00_10-run_2016-08-15-13-04-part.2.214005.avro", "00_10-run_2016-08-15-13-05-part.0.205738.avro", "00_10-run_2016-08-15-13-05-part.1.158273.avro", "00_10-run_2016-08-15-13-05-part.2.982345.avro", "00_10-run_2016-08-15-13-06-part.0.313245.avro", "00_10-run_2016-08-15-13-06-part.1.234212.avro", "00_10-run_2016-08-15-13-06-part.2.413232.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < numInput; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = ".*\\.avro";
    String blackList = "";
    // 00_10-run_2016-08-15-13-04-part.[id].138132.avro
    String groupPattern = ".*part\\.[id]\\..*\\.avro";
    int expectedNumPartition = 3;
    int[][] expectedPartitioning = { // files from index 0, 3, 6 should be grouped into one partition
    { 0, 3, 6 }, // similar as above
    { 1, 4, 7 }, { 2, 5, 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(expectedNumPartition, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
Also used : Partition(org.apache.samza.Partition) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 8 with FileMetadata

use of org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata in project samza by apache.

the class TestDirectoryPartitioner method testBasicWhiteListFiltering.

@Test
public void testBasicWhiteListFiltering() {
    List<FileMetadata> testList = new ArrayList<>();
    int numInput = 9;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < numInput; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = "part-.*\\.avro";
    String blackList = "";
    String groupPattern = "";
    int expectedNumPartition = 6;
    int[][] expectedPartitioning = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(expectedNumPartition, metadataMap.size());
    Map<Partition, List<String>> descriptorMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriptorMap);
}
Also used : Partition(org.apache.samza.Partition) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Example 9 with FileMetadata

use of org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata in project samza by apache.

the class TestDirectoryPartitioner method testWhiteListBlackListFiltering.

@Test
public void testWhiteListBlackListFiltering() {
    List<FileMetadata> testList = new ArrayList<>();
    int numInput = 9;
    String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
    long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
    for (int i = 0; i < numInput; i++) {
        testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
    }
    String whiteList = "part-.*\\.avro";
    String blackList = "part-002.avro";
    String groupPattern = "";
    int expectedNumPartition = 5;
    int[][] expectedPartitioning = { { 0 }, { 2 }, { 4 }, { 6 }, { 8 } };
    DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
    Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
    Assert.assertEquals(expectedNumPartition, metadataMap.size());
    Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
    verifyPartitionDescriptor(inputFiles, expectedPartitioning, expectedNumPartition, descriporMap);
}
Also used : Partition(org.apache.samza.Partition) FileMetadata(org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata) ArrayList(java.util.ArrayList) SystemStreamPartitionMetadata(org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata) List(java.util.List) ArrayList(java.util.ArrayList) Test(org.junit.Test)

Aggregations

FileMetadata (org.apache.samza.system.hdfs.partitioner.FileSystemAdapter.FileMetadata)9 ArrayList (java.util.ArrayList)8 List (java.util.List)8 Partition (org.apache.samza.Partition)7 SystemStreamPartitionMetadata (org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata)7 Test (org.junit.Test)6 HashMap (java.util.HashMap)2 SamzaException (org.apache.samza.SamzaException)2 HashSet (java.util.HashSet)1