use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class TestStripeReader method testRowSize.
@Test
public void testRowSize() {
int numberOfEntries = 10_000;
long numRowsInGroup = MILLION;
IntegerStatistics integerStatistics = new IntegerStatistics(0L, 0L, 0L);
ColumnStatistics intColumnStatistics = new IntegerColumnStatistics(numRowsInGroup, null, integerStatistics);
ColumnStatistics mapColumnStatistics = new ColumnStatistics(numRowsInGroup, null);
ColumnStatistics mapKeyColumnStatistics = new IntegerColumnStatistics(numRowsInGroup * numberOfEntries, null, integerStatistics);
ColumnStatistics mapValueColumnStatistics = new IntegerColumnStatistics(numRowsInGroup * numberOfEntries, null, integerStatistics);
StreamId intStreamId = new StreamId(1, 0, Stream.StreamKind.ROW_INDEX);
StreamId mapStreamId = new StreamId(2, 0, Stream.StreamKind.ROW_INDEX);
StreamId mapKeyStreamId = new StreamId(3, 0, Stream.StreamKind.ROW_INDEX);
StreamId mapValueStreamId = new StreamId(4, 0, Stream.StreamKind.ROW_INDEX);
Map<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.of(intStreamId, createRowGroupIndex(intColumnStatistics), mapStreamId, createRowGroupIndex(mapColumnStatistics), mapKeyStreamId, createRowGroupIndex(mapKeyColumnStatistics), mapValueStreamId, createRowGroupIndex(mapValueColumnStatistics));
// Each row contains 1 integer, 2 * numberOfEntries * integer (2 is for key and value).
long expectedRowSize = INTEGER_VALUE_BYTES + 2 * numberOfEntries * INTEGER_VALUE_BYTES;
RowGroup rowGroup = StripeReader.createRowGroup(0, Long.MAX_VALUE, numRowsInGroup, columnIndexes, ImmutableMap.of(), ImmutableMap.of());
assertEquals(expectedRowSize, rowGroup.getMinAverageRowBytes());
}
use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class IcebergOrcFileWriter method toIcebergMinMax.
private static Optional<IcebergMinMax> toIcebergMinMax(ColumnStatistics orcColumnStats, org.apache.iceberg.types.Type icebergType) {
IntegerStatistics integerStatistics = orcColumnStats.getIntegerStatistics();
if (integerStatistics != null) {
Object min = integerStatistics.getMin();
Object max = integerStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.INTEGER) {
min = toIntExact((Long) min);
max = toIntExact((Long) max);
}
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
DoubleStatistics doubleStatistics = orcColumnStats.getDoubleStatistics();
if (doubleStatistics != null) {
Object min = doubleStatistics.getMin();
Object max = doubleStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
if (icebergType.typeId() == org.apache.iceberg.types.Type.TypeID.FLOAT) {
min = ((Double) min).floatValue();
max = ((Double) max).floatValue();
}
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
StringStatistics stringStatistics = orcColumnStats.getStringStatistics();
if (stringStatistics != null) {
Slice min = stringStatistics.getMin();
Slice max = stringStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
return Optional.of(new IcebergMinMax(icebergType, min.toStringUtf8(), max.toStringUtf8()));
}
DateStatistics dateStatistics = orcColumnStats.getDateStatistics();
if (dateStatistics != null) {
Integer min = dateStatistics.getMin();
Integer max = dateStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
DecimalStatistics decimalStatistics = orcColumnStats.getDecimalStatistics();
if (decimalStatistics != null) {
BigDecimal min = decimalStatistics.getMin();
BigDecimal max = decimalStatistics.getMax();
if (min == null || max == null) {
return Optional.empty();
}
min = min.setScale(((DecimalType) icebergType).scale());
max = max.setScale(((DecimalType) icebergType).scale());
return Optional.of(new IcebergMinMax(icebergType, min, max));
}
return Optional.empty();
}
use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class TestOrcReaderPositions method testRowGroupSkipping.
@Test
public void testRowGroupSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single stripe file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
};
try (OrcBatchRecordReader reader = createCustomOrcRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, false)) {
assertEquals(reader.getFileRowCount(), rowCount);
assertEquals(reader.getReaderRowCount(), rowCount);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
long position = 50_000;
while (true) {
int batchSize = reader.nextBatch();
if (batchSize == -1) {
break;
}
Block block = reader.readBlock(0);
for (int i = 0; i < batchSize; i++) {
assertEquals(BIGINT.getLong(block, i), position + i);
}
assertEquals(reader.getFilePosition(), position);
assertEquals(reader.getReaderPosition(), position);
position += batchSize;
}
assertEquals(position, 70_000);
assertEquals(reader.getFilePosition(), rowCount);
assertEquals(reader.getReaderPosition(), rowCount);
}
}
}
use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class TestOrcReaderPositions method testRowGroupSkippingWithAppendRowNumber.
@Test
public void testRowGroupSkippingWithAppendRowNumber() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single stripe file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 70_000);
};
List<Long> expectedValues = new ArrayList<>();
expectedValues.addAll(LongStream.range(50_000, 60_000).collect(ArrayList::new, List::add, List::addAll));
expectedValues.addAll(LongStream.range(70_000, 80_000).collect(ArrayList::new, List::add, List::addAll));
OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, true);
verifyAppendNumber(expectedValues, reader);
}
}
use of com.facebook.presto.orc.metadata.statistics.IntegerStatistics in project presto by prestodb.
the class TestOrcReaderPositions method testStripeSkippingWithAppendNumber.
@Test
public void testStripeSkippingWithAppendNumber() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
// EVery stripe has 20 rows and there are total of 5 stripes
// test reading second and fourth stripes
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == 100) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
};
List<Long> expectedValues = new ArrayList<>();
expectedValues.addAll(LongStream.range(20, 40).collect(ArrayList::new, List::add, List::addAll));
expectedValues.addAll(LongStream.range(60, 80).collect(ArrayList::new, List::add, List::addAll));
List<Long> actualValues = new ArrayList<>();
OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, true);
assertNotNull(reader);
Page returnPage;
while (true) {
returnPage = reader.getNextPage();
if (returnPage == null) {
break;
}
Block rowNumberBlock = returnPage.getBlock(1);
for (int i = 0; i < returnPage.getPositionCount(); i++) {
actualValues.add(rowNumberBlock.getLong(i));
}
}
assertEquals(actualValues, expectedValues);
}
}
Aggregations