use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestFiltered method testFilterOnString.
@Test
public void testFilterOnString() {
MessageColumnIO columnIO = new ColumnIOFactory(true).getColumnIO(schema);
MemPageStore memPageStore = writeTestRecords(columnIO, 1);
// First try matching against the A url in record 1
RecordMaterializer<Group> recordConverter = new GroupRecordConverter(schema);
RecordReaderImplementation<Group> recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://A"))));
readOne(recordReader, "r2 filtered out", r1);
// Second try matching against the B url in record 1 - it should fail as we only match
// against the first instance of a
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://B"))));
List<Group> all = readAll(recordReader);
assertEquals("There should be no matching records: " + all, 0, all.size());
// Finally try matching against the C url in record 2
recordReader = (RecordReaderImplementation<Group>) columnIO.getRecordReader(memPageStore, recordConverter, FilterCompat.get(column("Name.Url", equalTo("http://C"))));
readOne(recordReader, "r1 filtered out", r2);
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestMemColumn method testMemColumn.
@Test
public void testMemColumn() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
ColumnDescriptor path = schema.getColumnDescription(new String[] { "foo", "bar" });
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(42l, 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestMemColumn method testMemColumnSeveralPages.
@Test
public void testMemColumnSeveralPages() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
String[] col = new String[] { "foo", "bar" };
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
for (int i = 0; i < 2000; i++) {
columnWriter.write(42l, 0, 0);
}
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class TestMemColumn method testMemColumnSeveralPagesRepeated.
@Test
public void testMemColumnSeveralPagesRepeated() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { repeated group foo { repeated int64 bar; } }");
String[] col = new String[] { "foo", "bar" };
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
int[] rs = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
int[] ds = { 0, 1, 2, 0, 1, 2, 0, 1, 2 };
for (int i = 0; i < 837; i++) {
int r = rs[i % rs.length];
int d = ds[i % ds.length];
LOG.debug("write i: {}", i);
if (d == 2) {
columnWriter.write((long) i, r, d);
} else {
columnWriter.writeNull(r, d);
}
}
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
int i = 0;
for (int j = 0; j < columnReader.getTotalValueCount(); j++) {
int r = rs[i % rs.length];
int d = ds[i % ds.length];
LOG.debug("read i: {}", i);
assertEquals("r row " + i, r, columnReader.getCurrentRepetitionLevel());
assertEquals("d row " + i, d, columnReader.getCurrentDefinitionLevel());
if (d == 2) {
assertEquals("data row " + i, (long) i, columnReader.getLong());
}
columnReader.consume();
++i;
}
}
use of org.apache.parquet.column.page.mem.MemPageStore in project parquet-mr by apache.
the class ValidatingColumnWriteStore method testReadUsingRequestedSchemaWithIncompatibleField.
@Test
public void testReadUsingRequestedSchemaWithIncompatibleField() {
MessageType originalSchema = new MessageType("schema", new PrimitiveType(OPTIONAL, INT32, "e"));
MemPageStore store = new MemPageStore(1);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(originalSchema);
writeGroups(originalSchema, store, groupFactory.newGroup().append("e", 4));
try {
MessageType schemaWithIncompatibleField = new MessageType("schema", // Incompatible schema: different type
new PrimitiveType(OPTIONAL, BINARY, "e"));
readGroups(store, originalSchema, schemaWithIncompatibleField, 1);
fail("should have thrown an incompatible schema exception");
} catch (ParquetDecodingException e) {
assertEquals("The requested schema is not compatible with the file schema. incompatible types: optional binary e != optional int32 e", e.getMessage());
}
}
Aggregations