Search in sources :

Example 1 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testWriteWithGroupWriter.

@Test
public void testWriteWithGroupWriter() {
    final String[] expected = { "[DocId]: 10, r:0, d:0", "[Links, Forward]: 20, r:0, d:2", "[Links, Forward]: 40, r:1, d:2", "[Links, Forward]: 60, r:1, d:2", "[Links, Backward]: null, r:0, d:1", "[Name, Language, Code]: en-us, r:0, d:2", "[Name, Language, Country]: us, r:0, d:3", "[Name, Language, Code]: en, r:2, d:2", "[Name, Language, Country]: null, r:2, d:2", "[Name, Url]: http://A, r:0, d:2", "[Name, Url]: http://B, r:1, d:2", "[Name, Language, Code]: null, r:1, d:1", "[Name, Language, Country]: null, r:1, d:1", "[Name, Language, Code]: en-gb, r:1, d:2", "[Name, Language, Country]: gb, r:1, d:3", "[Name, Url]: null, r:1, d:1", "[DocId]: 20, r:0, d:0", "[Links, Backward]: 10, r:0, d:2", "[Links, Backward]: 30, r:1, d:2", "[Links, Forward]: 80, r:0, d:2", "[Name, Url]: http://C, r:0, d:2", "[Name, Language, Code]: null, r:0, d:1", "[Name, Language, Country]: null, r:0, d:1" };
    ValidatingColumnWriteStore columns = new ValidatingColumnWriteStore(expected);
    MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
    groupWriter.write(r1);
    groupWriter.write(r2);
    recordWriter.flush();
    columns.validate();
    columns.flush();
    columns.close();
}
Also used : RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Example 2 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method writeGroups.

private void writeGroups(MessageType writtenSchema, MemPageStore memPageStore, Group... groups) {
    ColumnIOFactory columnIOFactory = new ColumnIOFactory(true);
    ColumnWriteStoreV1 columns = newColumnWriteStore(memPageStore);
    MessageColumnIO columnIO = columnIOFactory.getColumnIO(writtenSchema);
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    GroupWriter groupWriter = new GroupWriter(recordWriter, writtenSchema);
    for (Group group : groups) {
        groupWriter.write(group);
    }
    recordWriter.flush();
    columns.flush();
}
Also used : Group(org.apache.parquet.example.data.Group) ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter)

Example 3 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class TestFiltered method writeTestRecords.

private MemPageStore writeTestRecords(MessageColumnIO columnIO, int number) {
    MemPageStore memPageStore = new MemPageStore(number * 2);
    ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, ParquetProperties.builder().withPageSize(800).withDictionaryEncoding(false).build());
    RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
    GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
    for (int i = 0; i < number; i++) {
        groupWriter.write(r1);
        groupWriter.write(r2);
    }
    recordWriter.flush();
    columns.flush();
    return memPageStore;
}
Also used : ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) MemPageStore(org.apache.parquet.column.page.mem.MemPageStore) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter)

Example 4 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class PerfTest method write.

private static void write(MemPageStore memPageStore) {
    ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, ParquetProperties.builder().withPageSize(50 * 1024 * 1024).withDictionaryEncoding(false).build());
    MessageColumnIO columnIO = newColumnFactory(schema);
    GroupWriter groupWriter = new GroupWriter(columnIO.getRecordWriter(columns), schema);
    groupWriter.write(r1);
    groupWriter.write(r2);
    write(memPageStore, groupWriter, 10000);
    write(memPageStore, groupWriter, 10000);
    write(memPageStore, groupWriter, 10000);
    write(memPageStore, groupWriter, 10000);
    write(memPageStore, groupWriter, 10000);
    write(memPageStore, groupWriter, 100000);
    write(memPageStore, groupWriter, 1000000);
    columns.flush();
    System.out.println();
    System.out.println(columns.getBufferedSize() + " bytes used total");
    System.out.println("max col size: " + columns.maxColMemSize() + " bytes");
}
Also used : ColumnWriteStoreV1(org.apache.parquet.column.impl.ColumnWriteStoreV1) GroupWriter(org.apache.parquet.example.data.GroupWriter)

Example 5 with GroupWriter

use of org.apache.parquet.example.data.GroupWriter in project parquet-mr by apache.

the class ValidatingColumnWriteStore method testGroupWriter.

@Test
public void testGroupWriter() {
    List<Group> result = new ArrayList<Group>();
    final GroupRecordConverter groupRecordConverter = new GroupRecordConverter(schema);
    RecordConsumer groupConsumer = new ConverterConsumer(groupRecordConverter.getRootConverter(), schema);
    GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(groupConsumer), schema);
    groupWriter.write(r1);
    result.add(groupRecordConverter.getCurrentRecord());
    groupWriter.write(r2);
    result.add(groupRecordConverter.getCurrentRecord());
    assertEquals("deserialization does not display the expected result", result.get(0).toString(), r1.toString());
    assertEquals("deserialization does not display the expected result", result.get(1).toString(), r2.toString());
}
Also used : Group(org.apache.parquet.example.data.Group) GroupRecordConverter(org.apache.parquet.example.data.simple.convert.GroupRecordConverter) ArrayList(java.util.ArrayList) RecordConsumer(org.apache.parquet.io.api.RecordConsumer) GroupWriter(org.apache.parquet.example.data.GroupWriter) Test(org.junit.Test)

Aggregations

GroupWriter (org.apache.parquet.example.data.GroupWriter)9 RecordConsumer (org.apache.parquet.io.api.RecordConsumer)7 ColumnWriteStoreV1 (org.apache.parquet.column.impl.ColumnWriteStoreV1)6 Group (org.apache.parquet.example.data.Group)5 MemPageStore (org.apache.parquet.column.page.mem.MemPageStore)4 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)3 GroupRecordConverter (org.apache.parquet.example.data.simple.convert.GroupRecordConverter)2 List (java.util.List)1 SimpleGroup (org.apache.parquet.example.data.simple.SimpleGroup)1 ConverterConsumer (org.apache.parquet.io.ConverterConsumer)1 RecordConsumerLoggingWrapper (org.apache.parquet.io.RecordConsumerLoggingWrapper)1 MessageType (org.apache.parquet.schema.MessageType)1 Tuple (org.apache.pig.data.Tuple)1