Search in sources :

Example 51 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeMetadata.

@Test
public void testMergeMetadata() {
    FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test");
    FileMetaData md2 = new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2");
    GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
    assertEquals(merged.getSchema(), new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")));
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 52 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeFooters.

@Test
public void testMergeFooters() {
    List<BlockMetaData> oneBlocks = new ArrayList<BlockMetaData>();
    oneBlocks.add(new BlockMetaData());
    oneBlocks.add(new BlockMetaData());
    List<BlockMetaData> twoBlocks = new ArrayList<BlockMetaData>();
    twoBlocks.add(new BlockMetaData());
    List<BlockMetaData> expected = new ArrayList<BlockMetaData>();
    expected.addAll(oneBlocks);
    expected.addAll(twoBlocks);
    Footer one = new Footer(new Path("file:/tmp/output/one.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test"), oneBlocks));
    Footer two = new Footer(new Path("/tmp/output/two.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2"), twoBlocks));
    List<Footer> footers = new ArrayList<Footer>();
    footers.add(one);
    footers.add(two);
    ParquetMetadata merged = ParquetFileWriter.mergeFooters(new Path("/tmp"), footers);
    assertEquals(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")), merged.getFileMetaData().getSchema());
    assertEquals("Should have all blocks", expected, merged.getBlocks());
}
Also used : Path(org.apache.hadoop.fs.Path) PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 53 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeMetadataWithNoConflictingKeyValues.

@Test
public void testMergeMetadataWithNoConflictingKeyValues() {
    Map<String, String> keyValues1 = new HashMap<String, String>() {

        {
            put("a", "b");
        }
    };
    Map<String, String> keyValues2 = new HashMap<String, String>() {

        {
            put("c", "d");
        }
    };
    FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues1, "test");
    FileMetaData md2 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues2, "test");
    GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
    Map<String, String> mergedValues = merged.merge(new StrictKeyValueMetadataMergeStrategy()).getKeyValueMetaData();
    assertEquals("b", mergedValues.get("a"));
    assertEquals("d", mergedValues.get("c"));
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 54 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testWriteMetadataFileWithRelativeOutputPath.

/**
 * {@link ParquetFileWriter#mergeFooters(Path, List)} expects a fully-qualified
 * path for the root and crashes if a relative one is provided.
 */
@Test
public void testWriteMetadataFileWithRelativeOutputPath() throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    Path relativeRoot = new Path("target/_test_relative");
    Path qualifiedRoot = fs.makeQualified(relativeRoot);
    ParquetMetadata mock = Mockito.mock(ParquetMetadata.class);
    FileMetaData fileMetaData = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a")), new HashMap<String, String>(), "test");
    Mockito.when(mock.getFileMetaData()).thenReturn(fileMetaData);
    List<Footer> footers = new ArrayList<Footer>();
    Footer footer = new Footer(new Path(qualifiedRoot, "one"), mock);
    footers.add(footer);
    // This should not throw an exception
    ParquetFileWriter.writeMetadataFile(conf, relativeRoot, footers, JobSummaryLevel.ALL);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) FileSystem(org.apache.hadoop.fs.FileSystem) PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 55 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeMetadataWithConflictingKeyValues.

@Test
public void testMergeMetadataWithConflictingKeyValues() {
    Map<String, String> keyValues1 = new HashMap<String, String>() {

        {
            put("a", "b");
        }
    };
    Map<String, String> keyValues2 = new HashMap<String, String>() {

        {
            put("a", "c");
        }
    };
    FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues1, "test");
    FileMetaData md2 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues2, "test");
    GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
    try {
        merged.merge(new StrictKeyValueMetadataMergeStrategy());
        fail("Merge metadata is expected to fail because of conflicting key values");
    } catch (RuntimeException e) {
        // expected because of conflicting values
        assertTrue(e.getMessage().contains("could not merge metadata"));
    }
    Map<String, String> mergedKeyValues = merged.merge(new ConcatenatingKeyValueMetadataMergeStrategy()).getKeyValueMetaData();
    assertEquals(1, mergedKeyValues.size());
    String mergedValue = mergedKeyValues.get("a");
    assertTrue(mergedValue.equals("b,c") || mergedValue.equals("c,b"));
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10