Search in sources :

Example 16 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testReadUsingRequestedSchemaWithExtraFields.

@Test
public void testReadUsingRequestedSchemaWithExtraFields() {
    MessageType orginalSchema = new MessageType("schema", new PrimitiveType(REQUIRED, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"));
    MessageType schemaWithExtraField = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "b"), new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "c"));
    MemPageStore memPageStoreForOriginalSchema = new MemPageStore(1);
    MemPageStore memPageStoreForSchemaWithExtraField = new MemPageStore(1);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(orginalSchema);
    writeGroups(orginalSchema, memPageStoreForOriginalSchema, groupFactory.newGroup().append("a", 1).append("b", 2));
    SimpleGroupFactory groupFactory2 = new SimpleGroupFactory(schemaWithExtraField);
    writeGroups(schemaWithExtraField, memPageStoreForSchemaWithExtraField, groupFactory2.newGroup().append("a", 1).append("b", 2).append("c", 3));
    {
        List<Group> groups = new ArrayList<>();
        groups.addAll(readGroups(memPageStoreForOriginalSchema, orginalSchema, schemaWithExtraField, 1));
        groups.addAll(readGroups(memPageStoreForSchemaWithExtraField, schemaWithExtraField, schemaWithExtraField, 1));
        // TODO: add once we have the support for empty projection
        // groups1.addAll(readGroups(memPageStore3, schema3, schema2, 1));
        Object[][] expected = { { 2, 1, null }, { 2, 1, 3 } // { null, null}
        };
        validateGroups(groups, expected);
    }
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) SimpleGroupFactory(org.apache.parquet.example.data.simple.SimpleGroupFactory) ArrayList(java.util.ArrayList) List(java.util.List) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 17 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetParser method testPaperExample.

@Test
public void testPaperExample() {
    String example = "message Document {\n" + "  required int64 DocId;\n" + "  optional group Links {\n" + "    repeated int64 Backward;\n" + "    repeated int64 Forward; }\n" + "  repeated group Name {\n" + "    repeated group Language {\n" + "      required binary Code;\n" + "      required binary Country; }\n" + "    optional binary Url; }}";
    MessageType parsed = parseMessageType(example);
    MessageType manuallyMade = new MessageType("Document", new PrimitiveType(REQUIRED, INT64, "DocId"), new GroupType(OPTIONAL, "Links", new PrimitiveType(REPEATED, INT64, "Backward"), new PrimitiveType(REPEATED, INT64, "Forward")), new GroupType(REPEATED, "Name", new GroupType(REPEATED, "Language", new PrimitiveType(REQUIRED, BINARY, "Code"), new PrimitiveType(REQUIRED, BINARY, "Country")), new PrimitiveType(OPTIONAL, BINARY, "Url")));
    assertEquals(manuallyMade, parsed);
    MessageType parsedThenReparsed = parseMessageType(parsed.toString());
    assertEquals(manuallyMade, parsedThenReparsed);
    parsed = parseMessageType("message m { required group a {required binary b;} required group c { required int64 d; }}");
    manuallyMade = new MessageType("m", new GroupType(REQUIRED, "a", new PrimitiveType(REQUIRED, BINARY, "b")), new GroupType(REQUIRED, "c", new PrimitiveType(REQUIRED, INT64, "d")));
    assertEquals(manuallyMade, parsed);
    parsedThenReparsed = parseMessageType(parsed.toString());
    assertEquals(manuallyMade, parsedThenReparsed);
}
Also used : GroupType(org.apache.parquet.schema.GroupType) PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageTypeParser.parseMessageType(org.apache.parquet.schema.MessageTypeParser.parseMessageType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 18 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeMetadata.

@Test
public void testMergeMetadata() {
    FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test");
    FileMetaData md2 = new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2");
    GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
    assertEquals(merged.getSchema(), new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")));
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 19 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeFooters.

@Test
public void testMergeFooters() {
    List<BlockMetaData> oneBlocks = new ArrayList<BlockMetaData>();
    oneBlocks.add(new BlockMetaData());
    oneBlocks.add(new BlockMetaData());
    List<BlockMetaData> twoBlocks = new ArrayList<BlockMetaData>();
    twoBlocks.add(new BlockMetaData());
    List<BlockMetaData> expected = new ArrayList<BlockMetaData>();
    expected.addAll(oneBlocks);
    expected.addAll(twoBlocks);
    Footer one = new Footer(new Path("file:/tmp/output/one.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), new HashMap<String, String>(), "test"), oneBlocks));
    Footer two = new Footer(new Path("/tmp/output/two.parquet"), new ParquetMetadata(new FileMetaData(new MessageType("root2", new PrimitiveType(REQUIRED, BINARY, "c")), new HashMap<String, String>(), "test2"), twoBlocks));
    List<Footer> footers = new ArrayList<Footer>();
    footers.add(one);
    footers.add(two);
    ParquetMetadata merged = ParquetFileWriter.mergeFooters(new Path("/tmp"), footers);
    assertEquals(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b"), new PrimitiveType(REQUIRED, BINARY, "c")), merged.getFileMetaData().getSchema());
    assertEquals("Should have all blocks", expected, merged.getBlocks());
}
Also used : Path(org.apache.hadoop.fs.Path) PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 20 with PrimitiveType

use of org.apache.parquet.schema.PrimitiveType in project parquet-mr by apache.

the class TestParquetFileWriter method testMergeMetadataWithNoConflictingKeyValues.

@Test
public void testMergeMetadataWithNoConflictingKeyValues() {
    Map<String, String> keyValues1 = new HashMap<String, String>() {

        {
            put("a", "b");
        }
    };
    Map<String, String> keyValues2 = new HashMap<String, String>() {

        {
            put("c", "d");
        }
    };
    FileMetaData md1 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues1, "test");
    FileMetaData md2 = new FileMetaData(new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a"), new PrimitiveType(OPTIONAL, BINARY, "b")), keyValues2, "test");
    GlobalMetaData merged = ParquetFileWriter.mergeInto(md2, ParquetFileWriter.mergeInto(md1, null));
    Map<String, String> mergedValues = merged.merge(new StrictKeyValueMetadataMergeStrategy()).getKeyValueMetaData();
    assertEquals("b", mergedValues.get("a"));
    assertEquals("d", mergedValues.get("c"));
}
Also used : PrimitiveType(org.apache.parquet.schema.PrimitiveType) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Aggregations

PrimitiveType (org.apache.parquet.schema.PrimitiveType)123 Test (org.junit.Test)66 MessageType (org.apache.parquet.schema.MessageType)41 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)28 BooleanWritable (org.apache.hadoop.io.BooleanWritable)28 BytesWritable (org.apache.hadoop.io.BytesWritable)28 DoubleWritable (org.apache.hadoop.io.DoubleWritable)28 FloatWritable (org.apache.hadoop.io.FloatWritable)28 IntWritable (org.apache.hadoop.io.IntWritable)28 LongWritable (org.apache.hadoop.io.LongWritable)28 Writable (org.apache.hadoop.io.Writable)28 GroupType (org.apache.parquet.schema.GroupType)25 Test (org.testng.annotations.Test)20 ColumnDescriptor (org.apache.parquet.column.ColumnDescriptor)14 OriginalType (org.apache.parquet.schema.OriginalType)14 Type (org.apache.parquet.schema.Type)12 List (java.util.List)11 ColumnIndex (org.apache.parquet.internal.column.columnindex.ColumnIndex)11 ColumnIndexBuilder (org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder)11 ArrayList (java.util.ArrayList)10