use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testMergeMetadata.
@Test
public void testMergeMetadata() {
FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test");
FileMetaData md2 = new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2");
GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
assertEquals(merged.getSchema(), new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")));
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testMergeFooters.
@Test
public void testMergeFooters() {
List<BlockMetaData> oneBlocks = new ArrayList<BlockMetaData>();
oneBlocks.add(new BlockMetaData());
oneBlocks.add(new BlockMetaData());
List<BlockMetaData> twoBlocks = new ArrayList<BlockMetaData>();
twoBlocks.add(new BlockMetaData());
List<BlockMetaData> expected = new ArrayList<BlockMetaData>();
expected.addAll(oneBlocks);
expected.addAll(twoBlocks);
Footer one = new Footer(new Path("file:/tmp/output/one.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test"), oneBlocks));
Footer two = new Footer(new Path("/tmp/output/two.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2"), twoBlocks));
List<Footer> footers = new ArrayList<Footer>();
footers.add(one);
footers.add(two);
ParquetMetadata merged = ParquetFileWriter.mergeFooters(new Path("/tmp"), footers);
assertEquals(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")), merged.getFileMetaData().getSchema());
assertEquals("Should have all blocks", expected, merged.getBlocks());
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testMergeMetadataWithNoConflictingKeyValues.
@Test
public void testMergeMetadataWithNoConflictingKeyValues() {
Map<String, String> keyValues1 = new HashMap<String, String>() {
{
put("a", "b");
}
};
Map<String, String> keyValues2 = new HashMap<String, String>() {
{
put("c", "d");
}
};
FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues1, "test");
FileMetaData md2 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues2, "test");
GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
Map<String, String> mergedValues = merged.merge(new StrictKeyValueMetadataMergeStrategy()).getKeyValueMetaData();
assertEquals("b", mergedValues.get("a"));
assertEquals("d", mergedValues.get("c"));
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testWriteMetadataFileWithRelativeOutputPath.
/**
* {@link ParquetFileWriter#mergeFooters(Path, List)} expects a fully-qualified
* path for the root and crashes if a relative one is provided.
*/
@Test
public void testWriteMetadataFileWithRelativeOutputPath() throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
Path relativeRoot = new Path("target/_test_relative");
Path qualifiedRoot = fs.makeQualified(relativeRoot);
ParquetMetadata mock = Mockito.mock(ParquetMetadata.class);
FileMetaData fileMetaData = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a")), new HashMap<String, String>(), "test");
Mockito.when(mock.getFileMetaData()).thenReturn(fileMetaData);
List<Footer> footers = new ArrayList<Footer>();
Footer footer = new Footer(new Path(qualifiedRoot, "one"), mock);
footers.add(footer);
// This should not throw an exception
ParquetFileWriter.writeMetadataFile(conf, relativeRoot, footers, JobSummaryLevel.ALL);
}
use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.
the class TestParquetFileWriter method testMergeMetadataWithConflictingKeyValues.
@Test
public void testMergeMetadataWithConflictingKeyValues() {
Map<String, String> keyValues1 = new HashMap<String, String>() {
{
put("a", "b");
}
};
Map<String, String> keyValues2 = new HashMap<String, String>() {
{
put("a", "c");
}
};
FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues1, "test");
FileMetaData md2 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues2, "test");
GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
try {
merged.merge(new StrictKeyValueMetadataMergeStrategy());
fail("Merge metadata is expected to fail because of conflicting key values");
} catch (RuntimeException e) {
// expected because of conflicting values
assertTrue(e.getMessage().contains("could not merge metadata"));
}
Map<String, String> mergedKeyValues = merged.merge(new ConcatenatingKeyValueMetadataMergeStrategy()).getKeyValueMetaData();
assertEquals(1, mergedKeyValues.size());
String mergedValue = mergedKeyValues.get("a");
assertTrue(mergedValue.equals("b,c") || mergedValue.equals("c,b"));
}
Aggregations