use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testGroupWriter.
@Test
public void testGroupWriter() {
List<Group> result = new ArrayList<Group>();
final GroupRecordConverter groupRecordConverter = new GroupRecordConverter(schema);
RecordConsumer groupConsumer = new ConverterConsumer(groupRecordConverter.getRootConverter(), schema);
GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(groupConsumer), schema);
groupWriter.write(r1);
result.add(groupRecordConverter.getCurrentRecord());
groupWriter.write(r2);
result.add(groupRecordConverter.getCurrentRecord());
assertEquals("deserialization does not display the expected result", result.get(0).toString(), r1.toString());
assertEquals("deserialization does not display the expected result", result.get(1).toString(), r2.toString());
}
use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.
the class TestFiltered method testFilterOnInteger.
@Test
public void testFilterOnInteger() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// Get first record
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(10l))));
readOne(recordReader, "r2 filtered out", r1);
// Get second record
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("DocId", equalTo(20l))));
readOne(recordReader, "r1 filtered out", r2);
}
use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.
the class TestFiltered method testFilteredOrPaged.
@Test
public void testFilteredOrPaged() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 8);
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(or(column("DocId", equalTo(10l)), column("DocId", equalTo(20l)))));
List<Group> all = readAll(recordReader);
assertEquals("expecting 8 records " + all, 16, all.size());
for (int i = 0; i < all.size() / 2; i++) {
assertEquals("expecting record1", r1.toString(), all.get(2 * i).toString());
assertEquals("expecting record2", r2.toString(), all.get(2 * i + 1).toString());
}
}
use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.
the class TestTupleRecordConsumer method testFromGroups.
private void testFromGroups(String pigSchemaString, List<Group> input) throws ParserException {
List<Tuple> tuples = new ArrayList<Tuple>();
MessageType schema = getMessageType(pigSchemaString);
RecordMaterializer<Tuple> pigRecordConsumer = newPigRecordConsumer(pigSchemaString);
GroupWriter groupWriter = new GroupWriter(new RecordConsumerLoggingWrapper(new ConverterConsumer(pigRecordConsumer.getRootConverter(), schema)), schema);
for (Group group : input) {
groupWriter.write(group);
final Tuple tuple = pigRecordConsumer.getCurrentRecord();
tuples.add(tuple);
LOG.debug("in: {}\nout:{}", group, tuple);
}
List<Group> groups = new ArrayList<Group>();
GroupRecordConverter recordConsumer = new GroupRecordConverter(schema);
TupleWriteSupport tupleWriter = newTupleWriter(pigSchemaString, recordConsumer);
for (Tuple t : tuples) {
LOG.debug("{}", t);
tupleWriter.write(t);
groups.add(recordConsumer.getCurrentRecord());
}
assertEquals(input.size(), groups.size());
for (int i = 0; i < input.size(); i++) {
Group in = input.get(i);
LOG.debug("{}", in);
Group out = groups.get(i);
assertEquals(in.toString(), out.toString());
}
}
use of org.apache.parquet.example.data.simple.convert.GroupRecordConverter in project parquet-mr by apache.
the class TestFiltered method testApplyFunctionFilterOnString.
@Test
public void testApplyFunctionFilterOnString() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// First try matching against the A url in record 1
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", applyFunctionToString(new StringEndsWithAPredicate()))));
readOne(recordReader, "r2 filtered out", r1);
// Second try matching against the B url in record 1 - it should fail as we only match
// against the first instance of a
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
List<Group> all = readAll(recordReader);
assertEquals("There should be no matching records: " + all, 0, all.size());
// Finally try matching against the C url in record 2
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
readOne(recordReader, "r1 filtered out", r2);
}
Aggregations