use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testBasicWhiteListFiltering.
@Test
public void testBasicWhiteListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = "part-.*\\.avro";
String blackList = "";
String groupPattern = "";
int EXPECTED_NUM_PARTITION = 6;
int[][] EXPECTED_PARTITIONING = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriptorMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriptorMap);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testBasicBlackListFiltering.
@Test
public void testBasicBlackListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*";
String blackList = "delta-.*\\.avro";
String groupPattern = "";
int EXPECTED_NUM_PARTITION = 6;
int[][] EXPECTED_PARTITIONING = { { 0 }, { 1 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testBasicGrouping.
@Test
public void testBasicGrouping() {
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 9;
String[] inputFiles = { "00_10-run_2016-08-15-13-04-part.0.150582.avro", "00_10-run_2016-08-15-13-04-part.1.138132.avro", "00_10-run_2016-08-15-13-04-part.2.214005.avro", "00_10-run_2016-08-15-13-05-part.0.205738.avro", "00_10-run_2016-08-15-13-05-part.1.158273.avro", "00_10-run_2016-08-15-13-05-part.2.982345.avro", "00_10-run_2016-08-15-13-06-part.0.313245.avro", "00_10-run_2016-08-15-13-06-part.1.234212.avro", "00_10-run_2016-08-15-13-06-part.2.413232.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = ".*\\.avro";
String blackList = "";
// 00_10-run_2016-08-15-13-04-part.[id].138132.avro
String groupPattern = ".*part\\.[id]\\..*\\.avro";
int EXPECTED_NUM_PARTITION = 3;
int[][] EXPECTED_PARTITIONING = { // files from index 0, 3, 6 should be grouped into one partition
{ 0, 3, 6 }, // similar as above
{ 1, 4, 7 }, { 2, 5, 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
use of org.apache.samza.system.SystemStreamMetadata.SystemStreamPartitionMetadata in project samza by apache.
the class TestDirectoryPartitioner method testWhiteListBlackListFiltering.
@Test
public void testWhiteListBlackListFiltering() {
List<FileMetadata> testList = new ArrayList<>();
int NUM_INPUT = 9;
String[] inputFiles = { "part-001.avro", "part-002.avro", "part-003.avro", "delta-01.avro", "part-005.avro", "delta-03.avro", "part-004.avro", "delta-02.avro", "part-006.avro" };
long[] fileLength = { 150582, 138132, 214005, 205738, 158273, 982345, 313245, 234212, 413232 };
for (int i = 0; i < NUM_INPUT; i++) {
testList.add(new FileMetadata(inputFiles[i], fileLength[i]));
}
String whiteList = "part-.*\\.avro";
String blackList = "part-002.avro";
String groupPattern = "";
int EXPECTED_NUM_PARTITION = 5;
int[][] EXPECTED_PARTITIONING = { { 0 }, { 2 }, { 4 }, { 6 }, { 8 } };
DirectoryPartitioner directoryPartitioner = new DirectoryPartitioner(whiteList, blackList, groupPattern, new TestFileSystemAdapter(testList));
Map<Partition, SystemStreamPartitionMetadata> metadataMap = directoryPartitioner.getPartitionMetadataMap("hdfs", null);
Assert.assertEquals(EXPECTED_NUM_PARTITION, metadataMap.size());
Map<Partition, List<String>> descriporMap = directoryPartitioner.getPartitionDescriptor("hdfs");
verifyPartitionDescriptor(inputFiles, EXPECTED_PARTITIONING, EXPECTED_NUM_PARTITION, descriporMap);
}
Aggregations