use of com.facebook.presto.orc.OrcRecordReader in project presto by prestodb.
the class TestOrcStorageManager method testWriter.
@Test
public void testWriter() throws Exception {
OrcStorageManager manager = createOrcStorageManager();
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10));
StoragePageSink sink = createStoragePageSink(manager, columnIds, columnTypes);
List<Page> pages = rowPagesBuilder(columnTypes).row(123L, "hello").row(456L, "bye").build();
sink.appendPages(pages);
// shard is not recorded until flush
assertEquals(shardRecorder.getShards().size(), 0);
sink.flush();
// shard is recorded after flush
List<RecordedShard> recordedShards = shardRecorder.getShards();
assertEquals(recordedShards.size(), 1);
List<ShardInfo> shards = getFutureValue(sink.commit());
assertEquals(shards.size(), 1);
ShardInfo shardInfo = Iterables.getOnlyElement(shards);
UUID shardUuid = shardInfo.getShardUuid();
File file = storageService.getStorageFile(shardUuid);
File backupFile = fileBackupStore.getBackupFile(shardUuid);
assertEquals(recordedShards.get(0).getTransactionId(), TRANSACTION_ID);
assertEquals(recordedShards.get(0).getShardUuid(), shardUuid);
assertEquals(shardInfo.getRowCount(), 2);
assertEquals(shardInfo.getCompressedSize(), file.length());
// verify primary and backup shard exist
assertFile(file, "primary shard");
assertFile(backupFile, "backup shard");
assertFileEquals(file, backupFile);
// remove primary shard to force recovery from backup
assertTrue(file.delete());
assertTrue(file.getParentFile().delete());
assertFalse(file.exists());
recoveryManager.restoreFromBackup(shardUuid, OptionalLong.empty());
try (OrcDataSource dataSource = manager.openShard(shardUuid, READER_ATTRIBUTES)) {
OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.nextBatch(), 2);
Block column0 = reader.readBlock(BIGINT, 0);
assertEquals(column0.isNull(0), false);
assertEquals(column0.isNull(1), false);
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 1), 456L);
Block column1 = reader.readBlock(createVarcharType(10), 1);
assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("bye"));
assertEquals(reader.nextBatch(), -1);
}
}
use of com.facebook.presto.orc.OrcRecordReader in project presto by prestodb.
the class TestShardWriter method testWriter.
@Test
public void testWriter() throws Exception {
List<Long> columnIds = ImmutableList.of(1L, 2L, 4L, 6L, 7L, 8L, 9L, 10L);
ArrayType arrayType = new ArrayType(BIGINT);
ArrayType arrayOfArrayType = new ArrayType(arrayType);
MapType mapType = new MapType(createVarcharType(10), BOOLEAN);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(10), VARBINARY, DOUBLE, BOOLEAN, arrayType, mapType, arrayOfArrayType);
File file = new File(directory, System.nanoTime() + ".orc");
byte[] bytes1 = octets(0x00, 0xFE, 0xFF);
byte[] bytes3 = octets(0x01, 0x02, 0x19, 0x80);
RowPagesBuilder rowPagesBuilder = RowPagesBuilder.rowPagesBuilder(columnTypes).row(123L, "hello", wrappedBuffer(bytes1), 123.456, true, arrayBlockOf(BIGINT, 1, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))).row(null, "world", null, Double.POSITIVE_INFINITY, null, arrayBlockOf(BIGINT, 3, null), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))).row(456L, "bye ☃", wrappedBuffer(bytes3), Double.NaN, false, arrayBlockOf(BIGINT), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false), arrayBlockOf(arrayType, arrayBlockOf(BIGINT)));
try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(new EmptyClassLoader());
OrcFileWriter writer = new OrcFileWriter(columnIds, columnTypes, file)) {
writer.appendPages(rowPagesBuilder.build());
}
try (OrcDataSource dataSource = fileOrcDataSource(file)) {
OrcRecordReader reader = createReader(dataSource, columnIds, columnTypes);
assertEquals(reader.getReaderRowCount(), 3);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFileRowCount(), reader.getReaderRowCount());
assertEquals(reader.getFilePosition(), reader.getFilePosition());
assertEquals(reader.nextBatch(), 3);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFilePosition(), reader.getFilePosition());
Block column0 = reader.readBlock(BIGINT, 0);
assertEquals(column0.isNull(0), false);
assertEquals(column0.isNull(1), true);
assertEquals(column0.isNull(2), false);
assertEquals(BIGINT.getLong(column0, 0), 123L);
assertEquals(BIGINT.getLong(column0, 2), 456L);
Block column1 = reader.readBlock(createVarcharType(10), 1);
assertEquals(createVarcharType(10).getSlice(column1, 0), utf8Slice("hello"));
assertEquals(createVarcharType(10).getSlice(column1, 1), utf8Slice("world"));
assertEquals(createVarcharType(10).getSlice(column1, 2), utf8Slice("bye ☃"));
Block column2 = reader.readBlock(VARBINARY, 2);
assertEquals(VARBINARY.getSlice(column2, 0), wrappedBuffer(bytes1));
assertEquals(column2.isNull(1), true);
assertEquals(VARBINARY.getSlice(column2, 2), wrappedBuffer(bytes3));
Block column3 = reader.readBlock(DOUBLE, 3);
assertEquals(column3.isNull(0), false);
assertEquals(column3.isNull(1), false);
assertEquals(column3.isNull(2), false);
assertEquals(DOUBLE.getDouble(column3, 0), 123.456);
assertEquals(DOUBLE.getDouble(column3, 1), Double.POSITIVE_INFINITY);
assertEquals(DOUBLE.getDouble(column3, 2), Double.NaN);
Block column4 = reader.readBlock(BOOLEAN, 4);
assertEquals(column4.isNull(0), false);
assertEquals(column4.isNull(1), true);
assertEquals(column4.isNull(2), false);
assertEquals(BOOLEAN.getBoolean(column4, 0), true);
assertEquals(BOOLEAN.getBoolean(column4, 2), false);
Block column5 = reader.readBlock(arrayType, 5);
assertEquals(column5.getPositionCount(), 3);
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 0), arrayBlockOf(BIGINT, 1, 2)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 1), arrayBlockOf(BIGINT, 3, null)));
assertTrue(arrayBlocksEqual(BIGINT, arrayType.getObject(column5, 2), arrayBlockOf(BIGINT)));
Block column6 = reader.readBlock(mapType, 6);
assertEquals(column6.getPositionCount(), 3);
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 0), mapBlockOf(createVarcharType(5), BOOLEAN, "k1", true)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 1), mapBlockOf(createVarcharType(5), BOOLEAN, "k2", null)));
assertTrue(mapBlocksEqual(createVarcharType(5), BOOLEAN, arrayType.getObject(column6, 2), mapBlockOf(createVarcharType(5), BOOLEAN, "k3", false)));
Block column7 = reader.readBlock(arrayOfArrayType, 7);
assertEquals(column7.getPositionCount(), 3);
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 0), arrayBlockOf(arrayType, arrayBlockOf(BIGINT, 5))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 1), arrayBlockOf(arrayType, null, arrayBlockOf(BIGINT, 6, 7))));
assertTrue(arrayBlocksEqual(arrayType, arrayOfArrayType.getObject(column7, 2), arrayBlockOf(arrayType, arrayBlockOf(BIGINT))));
assertEquals(reader.nextBatch(), -1);
assertEquals(reader.getReaderPosition(), 3);
assertEquals(reader.getFilePosition(), reader.getFilePosition());
OrcFileMetadata orcFileMetadata = METADATA_CODEC.fromJson(reader.getUserMetadata().get(OrcFileMetadata.KEY).getBytes());
assertEquals(orcFileMetadata, new OrcFileMetadata(ImmutableMap.<Long, TypeSignature>builder().put(1L, BIGINT.getTypeSignature()).put(2L, createVarcharType(10).getTypeSignature()).put(4L, VARBINARY.getTypeSignature()).put(6L, DOUBLE.getTypeSignature()).put(7L, BOOLEAN.getTypeSignature()).put(8L, arrayType.getTypeSignature()).put(9L, mapType.getTypeSignature()).put(10L, arrayOfArrayType.getTypeSignature()).build()));
}
File crcFile = new File(file.getParentFile(), "." + file.getName() + ".crc");
assertFalse(crcFile.exists());
}
use of com.facebook.presto.orc.OrcRecordReader in project presto by prestodb.
the class ShardStats method doComputeColumnStats.
private static ColumnStats doComputeColumnStats(OrcReader orcReader, long columnId, Type type) throws IOException {
int columnIndex = columnIndex(orcReader.getColumnNames(), columnId);
OrcRecordReader reader = orcReader.createRecordReader(ImmutableMap.of(columnIndex, type), OrcPredicate.TRUE, UTC, new AggregatedMemoryContext());
if (type.equals(BooleanType.BOOLEAN)) {
return indexBoolean(type, reader, columnIndex, columnId);
}
if (type.equals(BigintType.BIGINT) || type.equals(DateType.DATE) || type.equals(TimestampType.TIMESTAMP)) {
return indexLong(type, reader, columnIndex, columnId);
}
if (type.equals(DoubleType.DOUBLE)) {
return indexDouble(type, reader, columnIndex, columnId);
}
if (type instanceof VarcharType) {
return indexString(type, reader, columnIndex, columnId);
}
return null;
}
Aggregations