use of io.prestosql.orc.metadata.OrcColumnId in project hetu-core by openlookeng.
the class PresentOutputStream method getStreamDataOutput.
public Optional<StreamDataOutput> getStreamDataOutput(OrcColumnId columnId) {
checkArgument(closed);
if (booleanOutputStream == null) {
return Optional.empty();
}
StreamDataOutput streamDataOutput = booleanOutputStream.getStreamDataOutput(columnId);
// rewrite the DATA stream created by the boolean output stream to a PRESENT stream
Stream stream = new Stream(columnId, PRESENT, toIntExact(streamDataOutput.size()), streamDataOutput.getStream().isUseVInts());
return Optional.of(new StreamDataOutput(sliceOutput -> {
streamDataOutput.writeData(sliceOutput);
return stream.getLength();
}, stream));
}
use of io.prestosql.orc.metadata.OrcColumnId in project hetu-core by openlookeng.
the class TestOrcReaderPositions method testRowGroupSkipping.
@Test
public void testRowGroupSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single strip file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
};
try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
assertEquals(reader.getFileRowCount(), rowCount);
assertEquals(reader.getReaderRowCount(), rowCount);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
long position = 50_000;
while (true) {
Page page = reader.nextPage();
if (page == null) {
break;
}
page = page.getLoadedPage();
Block block = page.getBlock(0);
for (int i = 0; i < block.getPositionCount(); i++) {
assertEquals(BIGINT.getLong(block, i), position + i);
}
assertEquals(reader.getFilePosition(), position);
assertEquals(reader.getReaderPosition(), position);
position += page.getPositionCount();
}
assertEquals(position, 70_000);
assertEquals(reader.getFilePosition(), rowCount);
assertEquals(reader.getReaderPosition(), rowCount);
}
}
}
use of io.prestosql.orc.metadata.OrcColumnId in project hetu-core by openlookeng.
the class TestOrcReaderPositions method testStripeSkipping.
@Test
public void testStripeSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
// test reading second and fourth stripes
OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
if (numberOfRows == 100) {
return true;
}
IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
};
try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
assertEquals(reader.getFileRowCount(), 100);
assertEquals(reader.getReaderRowCount(), 40);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
// second stripe
Page page = reader.nextPage().getLoadedPage();
assertEquals(page.getPositionCount(), 20);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFilePosition(), 20);
assertCurrentBatch(page, 1);
// fourth stripe
page = reader.nextPage().getLoadedPage();
assertEquals(page.getPositionCount(), 20);
assertEquals(reader.getReaderPosition(), 20);
assertEquals(reader.getFilePosition(), 60);
assertCurrentBatch(page, 3);
page = reader.nextPage();
assertNull(page);
assertEquals(reader.getReaderPosition(), 40);
assertEquals(reader.getFilePosition(), 100);
}
}
}
use of io.prestosql.orc.metadata.OrcColumnId in project hetu-core by openlookeng.
the class AbstractTestValueStream method testWriteValue.
protected void testWriteValue(List<List<T>> groups) throws IOException {
W outputStream = createValueOutputStream();
for (int i = 0; i < 3; i++) {
outputStream.reset();
long retainedBytes = 0;
for (List<T> group : groups) {
outputStream.recordCheckpoint();
group.forEach(value -> writeValue(outputStream, value));
assertTrue(outputStream.getRetainedBytes() >= retainedBytes);
retainedBytes = outputStream.getRetainedBytes();
}
outputStream.close();
DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1000);
StreamDataOutput streamDataOutput = outputStream.getStreamDataOutput(new OrcColumnId(33));
streamDataOutput.writeData(sliceOutput);
Stream stream = streamDataOutput.getStream();
assertEquals(stream.getStreamKind(), StreamKind.DATA);
assertEquals(stream.getColumnId(), new OrcColumnId(33));
assertEquals(stream.getLength(), sliceOutput.size());
List<C> checkpoints = outputStream.getCheckpoints();
assertEquals(checkpoints.size(), groups.size());
R valueStream = createValueStream(sliceOutput.slice());
for (List<T> group : groups) {
int index = 0;
for (T expectedValue : group) {
index++;
T actualValue = readValue(valueStream);
if (!actualValue.equals(expectedValue)) {
assertEquals(actualValue, expectedValue, "index=" + index);
}
}
}
for (int groupIndex = groups.size() - 1; groupIndex >= 0; groupIndex--) {
valueStream.seekToCheckpoint(checkpoints.get(groupIndex));
for (T expectedValue : groups.get(groupIndex)) {
T actualValue = readValue(valueStream);
if (!actualValue.equals(expectedValue)) {
assertEquals(actualValue, expectedValue);
}
}
}
}
}
use of io.prestosql.orc.metadata.OrcColumnId in project hetu-core by openlookeng.
the class ColumnWriters method createColumnWriter.
public static ColumnWriter createColumnWriter(OrcColumnId columnId, ColumnMetadata<OrcType> orcTypes, Type type, CompressionKind compression, int bufferSize, DataSize stringStatisticsLimit) {
requireNonNull(type, "type is null");
OrcType orcType = orcTypes.get(columnId);
switch(orcType.getOrcTypeKind()) {
case BOOLEAN:
return new BooleanColumnWriter(columnId, type, compression, bufferSize);
case FLOAT:
return new FloatColumnWriter(columnId, type, compression, bufferSize);
case DOUBLE:
return new DoubleColumnWriter(columnId, type, compression, bufferSize);
case BYTE:
return new ByteColumnWriter(columnId, type, compression, bufferSize);
case DATE:
return new LongColumnWriter(columnId, type, compression, bufferSize, DateStatisticsBuilder::new);
case SHORT:
case INT:
case LONG:
return new LongColumnWriter(columnId, type, compression, bufferSize, IntegerStatisticsBuilder::new);
case DECIMAL:
return new DecimalColumnWriter(columnId, type, compression, bufferSize);
case TIMESTAMP:
return new TimestampColumnWriter(columnId, type, compression, bufferSize);
case BINARY:
return new SliceDirectColumnWriter(columnId, type, compression, bufferSize, BinaryStatisticsBuilder::new);
case CHAR:
case VARCHAR:
case STRING:
return new SliceDictionaryColumnWriter(columnId, type, compression, bufferSize, stringStatisticsLimit);
case LIST:
{
OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(0);
Type fieldType = type.getTypeParameters().get(0);
ColumnWriter elementWriter = createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit);
return new ListColumnWriter(columnId, compression, bufferSize, elementWriter);
}
case MAP:
{
ColumnWriter keyWriter = createColumnWriter(orcType.getFieldTypeIndex(0), orcTypes, type.getTypeParameters().get(0), compression, bufferSize, stringStatisticsLimit);
ColumnWriter valueWriter = createColumnWriter(orcType.getFieldTypeIndex(1), orcTypes, type.getTypeParameters().get(1), compression, bufferSize, stringStatisticsLimit);
return new MapColumnWriter(columnId, compression, bufferSize, keyWriter, valueWriter);
}
case STRUCT:
{
ImmutableList.Builder<ColumnWriter> fieldWriters = ImmutableList.builder();
for (int fieldId = 0; fieldId < orcType.getFieldCount(); fieldId++) {
OrcColumnId fieldColumnIndex = orcType.getFieldTypeIndex(fieldId);
Type fieldType = type.getTypeParameters().get(fieldId);
fieldWriters.add(createColumnWriter(fieldColumnIndex, orcTypes, fieldType, compression, bufferSize, stringStatisticsLimit));
}
return new StructColumnWriter(columnId, compression, bufferSize, fieldWriters.build());
}
}
throw new IllegalArgumentException("Unsupported type: " + type);
}
Aggregations