use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class ParquetTester method testSingleRead.
public static void testSingleRead(Iterable<?>[] readValues, List<String> columnNames, List<Type> columnTypes, ParquetMetadataSource parquetMetadataSource, File dataFile) {
HiveClientConfig config = new HiveClientConfig().setHiveStorageFormat(HiveStorageFormat.PARQUET).setUseParquetColumnNames(false).setParquetMaxReadBlockSize(new DataSize(1_000, DataSize.Unit.BYTE));
ConnectorSession session = new TestingConnectorSession(new HiveSessionProperties(config, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties());
HiveBatchPageSourceFactory pageSourceFactory = new ParquetPageSourceFactory(FUNCTION_AND_TYPE_MANAGER, FUNCTION_RESOLUTION, HDFS_ENVIRONMENT, new FileFormatDataSourceStats(), parquetMetadataSource);
ConnectorPageSource connectorPageSource = createPageSource(pageSourceFactory, session, dataFile, columnNames, columnTypes, HiveStorageFormat.PARQUET);
Iterator<?>[] expectedValues = stream(readValues).map(Iterable::iterator).toArray(size -> new Iterator<?>[size]);
if (connectorPageSource instanceof RecordPageSource) {
assertRecordCursor(columnTypes, expectedValues, ((RecordPageSource) connectorPageSource).getCursor());
} else {
assertPageSource(columnTypes, expectedValues, connectorPageSource);
}
assertFalse(stream(expectedValues).allMatch(Iterator::hasNext));
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class AbstractTestHiveClient method readTable.
private MaterializedResult readTable(Transaction transaction, ConnectorTableHandle hiveTableHandle, ConnectorTableLayoutHandle hiveTableLayoutHandle, List<ColumnHandle> columnHandles, ConnectorSession session, OptionalInt expectedSplitCount, Optional<HiveStorageFormat> expectedStorageFormat) throws Exception {
List<ConnectorSplit> splits = getAllSplits(session, transaction, hiveTableLayoutHandle);
if (expectedSplitCount.isPresent()) {
assertEquals(splits.size(), expectedSplitCount.getAsInt());
}
TableHandle tableHandle = toTableHandle(transaction, hiveTableHandle, hiveTableLayoutHandle);
ImmutableList.Builder<MaterializedRow> allRows = ImmutableList.builder();
for (ConnectorSplit split : splits) {
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, tableHandle.getLayout().get(), columnHandles, NON_CACHEABLE)) {
expectedStorageFormat.ifPresent(format -> assertPageSourceType(pageSource, format));
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
allRows.addAll(result.getMaterializedRows());
}
}
return new MaterializedResult(allRows.build(), getTypes(columnHandles));
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class AbstractTestHiveClient method testGetRecords.
@Test
public void testGetRecords() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle hiveTableHandle = getTableHandle(metadata, tablePartitionFormat);
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, hiveTableHandle);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, hiveTableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
ConnectorTableLayoutHandle layoutHandle = getLayout(session, transaction, hiveTableHandle, TupleDomain.all());
List<ConnectorSplit> splits = getAllSplits(session, transaction, layoutHandle);
assertEquals(splits.size(), partitionCount);
for (ConnectorSplit split : splits) {
HiveSplit hiveSplit = (HiveSplit) split;
List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
String ds = partitionKeys.get(0).getValue().orElse(null);
String fileFormat = partitionKeys.get(1).getValue().orElse(null);
HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase());
int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue().orElse(null));
long rowNumber = 0;
long completedBytes = 0;
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, layoutHandle, columnHandles, NON_CACHEABLE)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertPageSourceType(pageSource, fileType);
for (MaterializedRow row : result) {
try {
assertValueTypes(row, tableMetadata.getColumns());
} catch (RuntimeException e) {
throw new RuntimeException("row " + rowNumber, e);
}
rowNumber++;
Object value;
value = row.getField(columnIndex.get("t_string"));
if (rowNumber % 19 == 0) {
assertNull(value);
} else if (rowNumber % 19 == 1) {
assertEquals(value, "");
} else {
assertEquals(value, "test");
}
assertEquals(row.getField(columnIndex.get("t_tinyint")), (byte) (1 + rowNumber));
assertEquals(row.getField(columnIndex.get("t_smallint")), (short) (2 + rowNumber));
assertEquals(row.getField(columnIndex.get("t_int")), 3 + (int) rowNumber);
if (rowNumber % 13 == 0) {
assertNull(row.getField(columnIndex.get("t_bigint")));
} else {
assertEquals(row.getField(columnIndex.get("t_bigint")), 4 + rowNumber);
}
assertEquals((Float) row.getField(columnIndex.get("t_float")), 5.1f + rowNumber, 0.001);
assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
if (rowNumber % 3 == 2) {
assertNull(row.getField(columnIndex.get("t_boolean")));
} else {
assertEquals(row.getField(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
}
assertEquals(row.getField(columnIndex.get("ds")), ds);
assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
long newCompletedBytes = pageSource.getCompletedBytes();
assertTrue(newCompletedBytes >= completedBytes);
assertTrue(newCompletedBytes <= hiveSplit.getLength());
completedBytes = newCompletedBytes;
}
assertTrue(completedBytes <= hiveSplit.getLength());
assertEquals(rowNumber, 100);
}
}
}
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class TestDynamicPruning method testDynamicPartitionPruning.
@Test
public void testDynamicPartitionPruning() {
HiveClientConfig config = new HiveClientConfig();
MetastoreClientConfig metastoreClientConfig = new MetastoreClientConfig();
HiveTransactionHandle transaction = new HiveTransactionHandle();
try (TempFile tempFile = new TempFile()) {
ConnectorPageSource emptyPageSource = createTestingPageSource(transaction, config, new SplitContext(false, getToSkipTupleDomain()), metastoreClientConfig, tempFile.file());
assertEquals(emptyPageSource.getClass(), HiveEmptySplitPageSource.class);
ConnectorPageSource nonEmptyPageSource = createTestingPageSource(transaction, config, new SplitContext(false, getToKeepTupleDomain()), metastoreClientConfig, tempFile.file());
assertEquals(nonEmptyPageSource.getClass(), HivePageSource.class);
} catch (IOException e) {
e.printStackTrace();
fail();
}
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class TestHiveFileFormats method testPageSourceFactory.
private void testPageSourceFactory(HiveBatchPageSourceFactory sourceFactory, FileSplit split, HiveStorageFormat storageFormat, List<TestColumn> testColumns, ConnectorSession session, int rowCount) throws IOException {
List<HivePartitionKey> partitionKeys = testColumns.stream().filter(TestColumn::isPartitionKey).map(TestColumn::toHivePartitionKey).collect(toList());
List<HiveColumnHandle> partitionKeyColumnHandles = getColumnHandles(testColumns.stream().filter(TestColumn::isPartitionKey).collect(toImmutableList()));
List<Column> tableDataColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).map(column -> new Column(column.getName(), HiveType.valueOf(column.getType()), Optional.empty(), Optional.empty())).collect(toImmutableList());
List<HiveColumnHandle> columnHandles = getColumnHandles(testColumns);
Optional<ConnectorPageSource> pageSource = HivePageSourceProvider.createHivePageSource(ImmutableSet.of(), ImmutableSet.of(sourceFactory), new Configuration(), session, split.getPath(), OptionalInt.empty(), split.getStart(), split.getLength(), split.getLength(), Instant.now().toEpochMilli(), new Storage(StorageFormat.create(storageFormat.getSerDe(), storageFormat.getInputFormat(), storageFormat.getOutputFormat()), "location", Optional.empty(), false, ImmutableMap.of(), ImmutableMap.of()), TupleDomain.all(), columnHandles, ImmutableMap.of(), partitionKeys, DateTimeZone.getDefault(), FUNCTION_AND_TYPE_MANAGER, new SchemaTableName("schema", "table"), partitionKeyColumnHandles, tableDataColumns, ImmutableMap.of(), tableDataColumns.size(), TableToPartitionMapping.empty(), Optional.empty(), false, DEFAULT_HIVE_FILE_CONTEXT, TRUE_CONSTANT, false, ROW_EXPRESSION_SERVICE, Optional.empty(), ImmutableMap.of());
assertTrue(pageSource.isPresent());
checkPageSource(pageSource.get(), testColumns, getTypes(columnHandles), rowCount);
}
Aggregations