Search in sources :

Example 1 with SimpleGroup

use of org.apache.parquet.example.data.simple.SimpleGroup in project parquet-mr by apache.

the class TestTupleRecordConsumer method testArtSchema.

@Test
public void testArtSchema() throws ExecException, ParserException {
    String pigSchemaString = "DocId:long, " + "Links:(Backward:{(long)}, Forward:{(long)}), " + "Name:{(Language:{(Code:chararray,Country:chararray)}, Url:chararray)}";
    SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
    g.add("DocId", 1l);
    Group links = g.addGroup("Links");
    links.addGroup("Backward").addGroup("bag").add(0, 1l);
    links.addGroup("Forward").addGroup("bag").add(0, 1l);
    Group name = g.addGroup("Name").addGroup("bag");
    name.addGroup("Language").addGroup("bag").append("Code", "en").append("Country", "US");
    name.add("Url", "http://foo/bar");
    testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) Test(org.junit.Test)

Example 2 with SimpleGroup

use of org.apache.parquet.example.data.simple.SimpleGroup in project parquet-mr by apache.

the class TestTupleRecordConsumer method testBags.

@Test
public void testBags() throws ExecException, ParserException {
    String pigSchemaString = "a: {(b: chararray)}";
    SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
    Group addGroup = g.addGroup("a");
    addGroup.addGroup("bag").append("b", "foo");
    addGroup.addGroup("bag").append("b", "bar");
    testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) Test(org.junit.Test)

Example 3 with SimpleGroup

use of org.apache.parquet.example.data.simple.SimpleGroup in project parquet-mr by apache.

the class TestTupleRecordConsumer method testMaps.

@Test
public void testMaps() throws ExecException, ParserException {
    String pigSchemaString = "a: [(b: chararray)]";
    SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
    Group map = g.addGroup("a");
    map.addGroup("map").append("key", "foo").addGroup("value").append("b", "foo");
    map.addGroup("map").append("key", "bar").addGroup("value").append("b", "bar");
    testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) Test(org.junit.Test)

Example 4 with SimpleGroup

use of org.apache.parquet.example.data.simple.SimpleGroup in project parquet-mr by apache.

the class TestParquetFileWriter method testWriteReadStatisticsAllNulls.

@Test
public void testWriteReadStatisticsAllNulls() throws Exception {
    // this test assumes statistics will be read
    Assume.assumeTrue(!shouldIgnoreStatistics(Version.FULL_VERSION, BINARY));
    File testFile = temp.newFile();
    testFile.delete();
    writeSchema = "message example {\n" + "required binary content (UTF8);\n" + "}";
    Path path = new Path(testFile.toURI());
    MessageType schema = MessageTypeParser.parseMessageType(writeSchema);
    Configuration configuration = new Configuration();
    configuration.setBoolean("parquet.strings.signed-min-max.enabled", true);
    GroupWriteSupport.setSchema(schema, configuration);
    ParquetWriter<Group> writer = new ParquetWriter<Group>(path, configuration, new GroupWriteSupport());
    Group r1 = new SimpleGroup(schema);
    writer.write(r1);
    writer.close();
    ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path);
    // assert the statistics object is not empty
    org.apache.parquet.column.statistics.Statistics stats = readFooter.getBlocks().get(0).getColumns().get(0).getStatistics();
    assertFalse("is empty: " + stats, stats.isEmpty());
    // assert the number of nulls are correct for the first block
    assertEquals("nulls: " + stats, 1, stats.getNumNulls());
}
Also used : Path(org.apache.hadoop.fs.Path) Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) Configuration(org.apache.hadoop.conf.Configuration) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) GroupWriteSupport(org.apache.parquet.hadoop.example.GroupWriteSupport) File(java.io.File) MessageType(org.apache.parquet.schema.MessageType) Test(org.junit.Test)

Example 5 with SimpleGroup

use of org.apache.parquet.example.data.simple.SimpleGroup in project parquet-mr by apache.

the class PhoneBookWriter method groupFromUser.

public static SimpleGroup groupFromUser(User user) {
    SimpleGroup root = new SimpleGroup(schema);
    root.append("id", user.getId());
    if (user.getName() != null) {
        root.append("name", user.getName());
    }
    if (user.getPhoneNumbers() != null) {
        Group phoneNumbers = root.addGroup("phoneNumbers");
        for (PhoneNumber number : user.getPhoneNumbers()) {
            Group phone = phoneNumbers.addGroup("phone");
            phone.append("number", number.getNumber());
            if (number.getKind() != null) {
                phone.append("kind", number.getKind());
            }
        }
    }
    if (user.getLocation() != null) {
        Group location = root.addGroup("location");
        if (user.getLocation().getLon() != null) {
            location.append("lon", user.getLocation().getLon());
        }
        if (user.getLocation().getLat() != null) {
            location.append("lat", user.getLocation().getLat());
        }
    }
    return root;
}
Also used : Group(org.apache.parquet.example.data.Group) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup) SimpleGroup(org.apache.parquet.example.data.simple.SimpleGroup)

Aggregations

Group (org.apache.parquet.example.data.Group)5 SimpleGroup (org.apache.parquet.example.data.simple.SimpleGroup)5 Test (org.junit.Test)4 File (java.io.File)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 GroupWriteSupport (org.apache.parquet.hadoop.example.GroupWriteSupport)1 MessageType (org.apache.parquet.schema.MessageType)1