use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.
the class TestMemColumn method testMemColumn.
@Test
public void testMemColumn() throws Exception {
MessageType schema = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
ColumnDescriptor path = schema.getColumnDescription(new String[] { "foo", "bar" });
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
columnWriter.write(42l, 0, 0);
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, schema);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.
the class TestMemColumn method testMemColumnSeveralPages.
@Test
public void testMemColumnSeveralPages() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { required group foo { required int64 bar; } }");
String[] col = new String[] { "foo", "bar" };
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
for (int i = 0; i < 2000; i++) {
columnWriter.write(42l, 0, 0);
}
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
for (int i = 0; i < columnReader.getTotalValueCount(); i++) {
assertEquals(columnReader.getCurrentRepetitionLevel(), 0);
assertEquals(columnReader.getCurrentDefinitionLevel(), 0);
assertEquals(columnReader.getLong(), 42);
columnReader.consume();
}
}
use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.
the class TestMemColumn method testMemColumnSeveralPagesRepeated.
@Test
public void testMemColumnSeveralPagesRepeated() throws Exception {
MessageType mt = MessageTypeParser.parseMessageType("message msg { repeated group foo { repeated int64 bar; } }");
String[] col = new String[] { "foo", "bar" };
MemPageStore memPageStore = new MemPageStore(10);
ColumnWriteStoreV1 memColumnsStore = newColumnWriteStoreImpl(memPageStore);
ColumnDescriptor path1 = mt.getColumnDescription(col);
ColumnDescriptor path = path1;
ColumnWriter columnWriter = memColumnsStore.getColumnWriter(path);
int[] rs = { 0, 0, 0, 1, 1, 1, 2, 2, 2 };
int[] ds = { 0, 1, 2, 0, 1, 2, 0, 1, 2 };
for (int i = 0; i < 837; i++) {
int r = rs[i % rs.length];
int d = ds[i % ds.length];
LOG.debug("write i: {}", i);
if (d == 2) {
columnWriter.write((long) i, r, d);
} else {
columnWriter.writeNull(r, d);
}
}
memColumnsStore.flush();
ColumnReader columnReader = getColumnReader(memPageStore, path, mt);
int i = 0;
for (int j = 0; j < columnReader.getTotalValueCount(); j++) {
int r = rs[i % rs.length];
int d = ds[i % ds.length];
LOG.debug("read i: {}", i);
assertEquals("r row " + i, r, columnReader.getCurrentRepetitionLevel());
assertEquals("d row " + i, d, columnReader.getCurrentDefinitionLevel());
if (d == 2) {
assertEquals("data row " + i, (long) i, columnReader.getLong());
}
columnReader.consume();
++i;
}
}
use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.
the class DumpCommand method dump.
public static void dump(PrettyPrintWriter out, ColumnReadStoreImpl crstore, ColumnDescriptor column, long page, long total, long offset) throws IOException {
int dmax = column.getMaxDefinitionLevel();
ColumnReader creader = crstore.getColumnReader(column);
out.format("*** row group %d of %d, values %d to %d ***%n", page, total, offset, offset + creader.getTotalValueCount() - 1);
for (long i = 0, e = creader.getTotalValueCount(); i < e; ++i) {
int rlvl = creader.getCurrentRepetitionLevel();
int dlvl = creader.getCurrentDefinitionLevel();
out.format("value %d: R:%d D:%d V:", offset + i, rlvl, dlvl);
if (dlvl == dmax) {
PrimitiveStringifier stringifier = column.getPrimitiveType().stringifier();
switch(column.getType()) {
case FIXED_LEN_BYTE_ARRAY:
case INT96:
case BINARY:
out.print(stringifier.stringify(creader.getBinary()));
break;
case BOOLEAN:
out.print(stringifier.stringify(creader.getBoolean()));
break;
case DOUBLE:
out.print(stringifier.stringify(creader.getDouble()));
break;
case FLOAT:
out.print(stringifier.stringify(creader.getFloat()));
break;
case INT32:
out.print(stringifier.stringify(creader.getInteger()));
break;
case INT64:
out.print(stringifier.stringify(creader.getLong()));
break;
}
} else {
out.format("<null>");
}
out.println();
creader.consume();
}
}
use of org.apache.parquet.column.ColumnReader in project parquet-mr by apache.
the class RecordReaderImplementation method read.
/**
* @see org.apache.parquet.io.RecordReader#read()
*/
@Override
public T read() {
int currentLevel = 0;
recordRootConverter.start();
State currentState = states[0];
do {
ColumnReader columnReader = currentState.column;
int d = columnReader.getCurrentDefinitionLevel();
// creating needed nested groups until the current field (opening tags)
int depth = currentState.definitionLevelToDepth[d];
for (; currentLevel <= depth; ++currentLevel) {
currentState.groupConverterPath[currentLevel].start();
}
// set the current value
if (d >= currentState.maxDefinitionLevel) {
// not null
columnReader.writeCurrentValueToConverter();
}
columnReader.consume();
int nextR = currentState.maxRepetitionLevel == 0 ? 0 : columnReader.getCurrentRepetitionLevel();
// level to go to close current groups
int next = currentState.nextLevel[nextR];
for (; currentLevel > next; currentLevel--) {
currentState.groupConverterPath[currentLevel - 1].end();
}
currentState = currentState.nextState[nextR];
} while (currentState != null);
recordRootConverter.end();
T record = recordMaterializer.getCurrentRecord();
shouldSkipCurrentRecord = record == null;
if (shouldSkipCurrentRecord) {
recordMaterializer.skipCurrentRecord();
}
return record;
}
Aggregations