use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestFiltered method testFilteredOrPaged.
@Test
public void testFilteredOrPaged() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 8);
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(or(column("DocId", equalTo(10l)), column("DocId", equalTo(20l)))));
List<Group> all = readAll(recordReader);
assertEquals("expecting 8 records " + all, 16, all.size());
for (int i = 0; i < all.size() / 2; i++) {
assertEquals("expecting record1", r1.toString(), all.get(2 * i).toString());
assertEquals("expecting record2", r2.toString(), all.get(2 * i + 1).toString());
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestMemPageStore method test.
@Test
public void test() throws IOException {
MemPageStore memPageStore = new MemPageStore(10);
ColumnDescriptor col = new ColumnDescriptor(path, PrimitiveTypeName.INT64, 2, 2);
LongStatistics stats = new LongStatistics();
PageWriter pageWriter = memPageStore.getPageWriter(col);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
PageReader pageReader = memPageStore.getPageReader(col);
long totalValueCount = pageReader.getTotalValueCount();
System.out.println(totalValueCount);
int total = 0;
do {
DataPage readPage = pageReader.readPage();
total += readPage.getValueCount();
System.out.println(readPage);
// TODO: assert
} while (total < totalValueCount);
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestMemColumn method testMemColumnBinary.
@Test
public void testMemColumnBinary() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required binary bar; } }");
String[] col = new String[] { "foo", "bar" };
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(Binary.fromString("42"), 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getBinary().toStringUsingUTF8(), "42");
columnReader.consume();
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TupleConsumerPerfTest method main.
public static void main(String[] args) throws Exception {
String pigSchema = pigSchema(false, false);
String pigSchemaProjected = pigSchema(true, false);
String pigSchemaNoString = pigSchema(true, true);
MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));
MemPageStore memPageStore = new MemPageStore(0);
ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(memPageStore, ParquetProperties.builder().withPageSize(50 * 1024 * 1024).withDictionaryEncoding(false).build());
write(memPageStore, columns, schema, pigSchema);
columns.flush();
read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
System.out.println(columns.getBufferedSize() + " bytes used total");
System.out.println("max col size: " + columns.maxColMemSize() + " bytes");
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestFiltered method testApplyFunctionFilterOnString.
@Test
public void testApplyFunctionFilterOnString() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// First try matching against the A url in record 1
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", applyFunctionToString(new StringEndsWithAPredicate()))));
readOne(recordReader, "r2 filtered out", r1);
// Second try matching against the B url in record 1 - it should fail as we only match
// against the first instance of a
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
List<Group> all = readAll(recordReader);
assertEquals("There should be no matching records: " + all, 0, all.size());
// Finally try matching against the C url in record 2
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
readOne(recordReader, "r1 filtered out", r2);
}
Aggregations