use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class TestHivePageSink method writeTestFile.
private static long writeTestFile(HiveClientConfig config, ExtendedHiveMetastore metastore, String outputPath) {
HiveTransactionHandle transaction = new HiveTransactionHandle();
ConnectorPageSink pageSink = createPageSink(transaction, config, metastore, new Path("file:///" + outputPath));
List<LineItemColumn> columns = getTestColumns();
List<Type> columnTypes = columns.stream().map(LineItemColumn::getType).map(TestHivePageSink::getHiveType).map(hiveType -> hiveType.getType(TYPE_MANAGER)).collect(toList());
PageBuilder pageBuilder = new PageBuilder(columnTypes);
int rows = 0;
for (LineItem lineItem : new LineItemGenerator(0.01, 1, 1)) {
rows++;
if (rows >= NUM_ROWS) {
break;
}
pageBuilder.declarePosition();
for (int i = 0; i < columns.size(); i++) {
LineItemColumn column = columns.get(i);
BlockBuilder blockBuilder = pageBuilder.getBlockBuilder(i);
switch(column.getType().getBase()) {
case IDENTIFIER:
BIGINT.writeLong(blockBuilder, column.getIdentifier(lineItem));
break;
case INTEGER:
INTEGER.writeLong(blockBuilder, column.getInteger(lineItem));
break;
case DATE:
DATE.writeLong(blockBuilder, column.getDate(lineItem));
break;
case DOUBLE:
DOUBLE.writeDouble(blockBuilder, column.getDouble(lineItem));
break;
case VARCHAR:
createUnboundedVarcharType().writeSlice(blockBuilder, Slices.utf8Slice(column.getString(lineItem)));
break;
default:
throw new IllegalArgumentException("Unsupported type " + column.getType());
}
}
}
Page page = pageBuilder.build();
pageSink.appendPage(page);
getFutureValue(pageSink.finish());
File outputDir = new File(outputPath);
List<File> files = ImmutableList.copyOf(outputDir.listFiles((dir, name) -> !name.endsWith(".crc")));
File outputFile = getOnlyElement(files);
long length = outputFile.length();
ConnectorPageSource pageSource = createPageSource(transaction, config, outputFile);
List<Page> pages = new ArrayList<>();
while (!pageSource.isFinished()) {
Page nextPage = pageSource.getNextPage();
if (nextPage != null) {
nextPage.assureLoaded();
pages.add(nextPage);
}
}
MaterializedResult expectedResults = toMaterializedResult(getSession(config), columnTypes, ImmutableList.of(page));
MaterializedResult results = toMaterializedResult(getSession(config), columnTypes, pages);
assertEquals(results, expectedResults);
return length;
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class HivePageSourceProvider method createHivePageSource.
public static Optional<ConnectorPageSource> createHivePageSource(Set<HiveRecordCursorProvider> cursorProviders, Set<HivePageSourceFactory> pageSourceFactories, String clientId, Configuration configuration, ConnectorSession session, Path path, OptionalInt bucketNumber, long start, long length, Properties schema, TupleDomain<HiveColumnHandle> effectivePredicate, List<HiveColumnHandle> hiveColumns, List<HivePartitionKey> partitionKeys, DateTimeZone hiveStorageTimeZone, TypeManager typeManager, Map<Integer, HiveType> columnCoercions) {
List<ColumnMapping> columnMappings = ColumnMapping.buildColumnMappings(partitionKeys, hiveColumns, columnCoercions, path, bucketNumber);
List<ColumnMapping> regularColumnMappings = ColumnMapping.extractRegularColumnMappings(columnMappings);
for (HivePageSourceFactory pageSourceFactory : pageSourceFactories) {
Optional<? extends ConnectorPageSource> pageSource = pageSourceFactory.createPageSource(configuration, session, path, start, length, schema, extractRegularColumnHandles(regularColumnMappings, true), effectivePredicate, hiveStorageTimeZone);
if (pageSource.isPresent()) {
return Optional.of(new HivePageSource(columnMappings, hiveStorageTimeZone, typeManager, pageSource.get()));
}
}
for (HiveRecordCursorProvider provider : cursorProviders) {
// GenericHiveRecordCursor will automatically do the coercion without HiveCoercionRecordCursor
boolean doCoercion = !(provider instanceof GenericHiveRecordCursorProvider);
Optional<RecordCursor> cursor = provider.createRecordCursor(clientId, configuration, session, path, start, length, schema, extractRegularColumnHandles(regularColumnMappings, doCoercion), effectivePredicate, hiveStorageTimeZone, typeManager);
if (cursor.isPresent()) {
RecordCursor delegate = cursor.get();
// Need to wrap RcText and RcBinary into a wrapper, which will do the coercion for mismatch columns
if (doCoercion) {
delegate = new HiveCoercionRecordCursor(regularColumnMappings, typeManager, delegate);
}
HiveRecordCursor hiveRecordCursor = new HiveRecordCursor(columnMappings, hiveStorageTimeZone, typeManager, delegate);
List<Type> columnTypes = hiveColumns.stream().map(input -> typeManager.getType(input.getTypeSignature())).collect(toList());
return Optional.of(new RecordPageSource(columnTypes, hiveRecordCursor));
}
}
return Optional.empty();
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class ScanFilterAndProjectOperator method getOutput.
@Override
public Page getOutput() {
if (split == null) {
return null;
}
if (!finishing) {
if ((pageSource == null) && (cursor == null)) {
ConnectorPageSource source = pageSourceProvider.createPageSource(operatorContext.getSession(), split, columns);
if (source instanceof RecordPageSource) {
cursor = ((RecordPageSource) source).getCursor();
} else {
pageSource = source;
}
}
if (cursor != null) {
int rowsProcessed = cursorProcessor.process(operatorContext.getSession().toConnectorSession(), cursor, ROWS_PER_PAGE, pageBuilder);
pageSourceMemoryContext.setBytes(cursor.getSystemMemoryUsage());
long bytesProcessed = cursor.getCompletedBytes() - completedBytes;
long elapsedNanos = cursor.getReadTimeNanos() - readTimeNanos;
operatorContext.recordGeneratedInput(bytesProcessed, rowsProcessed, elapsedNanos);
completedBytes = cursor.getCompletedBytes();
readTimeNanos = cursor.getReadTimeNanos();
if (rowsProcessed == 0) {
finishing = true;
}
} else {
if (currentPage == null) {
currentPage = pageSource.getNextPage();
if (currentPage != null) {
// update operator stats
long endCompletedBytes = pageSource.getCompletedBytes();
long endReadTimeNanos = pageSource.getReadTimeNanos();
operatorContext.recordGeneratedInput(endCompletedBytes - completedBytes, currentPage.getPositionCount(), endReadTimeNanos - readTimeNanos);
completedBytes = endCompletedBytes;
readTimeNanos = endReadTimeNanos;
}
currentPosition = 0;
}
if (currentPage != null) {
switch(processingOptimization) {
case COLUMNAR:
{
Page page = pageProcessor.processColumnar(operatorContext.getSession().toConnectorSession(), currentPage, getTypes());
currentPage = null;
currentPosition = 0;
return page;
}
case COLUMNAR_DICTIONARY:
{
Page page = pageProcessor.processColumnarDictionary(operatorContext.getSession().toConnectorSession(), currentPage, getTypes());
currentPage = null;
currentPosition = 0;
return page;
}
case DISABLED:
{
currentPosition = pageProcessor.process(operatorContext.getSession().toConnectorSession(), currentPage, currentPosition, currentPage.getPositionCount(), pageBuilder);
if (currentPosition == currentPage.getPositionCount()) {
currentPage = null;
currentPosition = 0;
}
break;
}
default:
throw new IllegalStateException(String.format("Found unexpected value %s for processingOptimization", processingOptimization));
}
}
pageSourceMemoryContext.setBytes(pageSource.getSystemMemoryUsage());
}
}
// only return a full page if buffer is full or we are finishing
if (pageBuilder.isEmpty() || (!finishing && !pageBuilder.isFull())) {
pageBuilderMemoryContext.setBytes(pageBuilder.getRetainedSizeInBytes());
return null;
}
Page page = pageBuilder.build();
pageBuilder.reset();
pageBuilderMemoryContext.setBytes(pageBuilder.getRetainedSizeInBytes());
return page;
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class AbstractTestHiveClient method testGetRecords.
@Test
public void testGetRecords() throws Exception {
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tablePartitionFormat);
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, tableHandle);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
Map<String, Integer> columnIndex = indexColumns(columnHandles);
List<ConnectorSplit> splits = getAllSplits(tableHandle, TupleDomain.all());
assertEquals(splits.size(), partitionCount);
for (ConnectorSplit split : splits) {
HiveSplit hiveSplit = (HiveSplit) split;
List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
String ds = partitionKeys.get(0).getValue();
String fileFormat = partitionKeys.get(1).getValue();
HiveStorageFormat fileType = HiveStorageFormat.valueOf(fileFormat.toUpperCase());
int dummyPartition = Integer.parseInt(partitionKeys.get(2).getValue());
long rowNumber = 0;
long completedBytes = 0;
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, hiveSplit, columnHandles)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertPageSourceType(pageSource, fileType);
for (MaterializedRow row : result) {
try {
assertValueTypes(row, tableMetadata.getColumns());
} catch (RuntimeException e) {
throw new RuntimeException("row " + rowNumber, e);
}
rowNumber++;
Object value;
value = row.getField(columnIndex.get("t_string"));
if (rowNumber % 19 == 0) {
assertNull(value);
} else if (rowNumber % 19 == 1) {
assertEquals(value, "");
} else {
assertEquals(value, "test");
}
assertEquals(row.getField(columnIndex.get("t_tinyint")), (byte) (1 + rowNumber));
assertEquals(row.getField(columnIndex.get("t_smallint")), (short) (2 + rowNumber));
assertEquals(row.getField(columnIndex.get("t_int")), 3 + (int) rowNumber);
if (rowNumber % 13 == 0) {
assertNull(row.getField(columnIndex.get("t_bigint")));
} else {
assertEquals(row.getField(columnIndex.get("t_bigint")), 4 + rowNumber);
}
assertEquals((Float) row.getField(columnIndex.get("t_float")), 5.1f + rowNumber, 0.001);
assertEquals(row.getField(columnIndex.get("t_double")), 6.2 + rowNumber);
if (rowNumber % 3 == 2) {
assertNull(row.getField(columnIndex.get("t_boolean")));
} else {
assertEquals(row.getField(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
}
assertEquals(row.getField(columnIndex.get("ds")), ds);
assertEquals(row.getField(columnIndex.get("file_format")), fileFormat);
assertEquals(row.getField(columnIndex.get("dummy")), dummyPartition);
long newCompletedBytes = pageSource.getCompletedBytes();
assertTrue(newCompletedBytes >= completedBytes);
assertTrue(newCompletedBytes <= hiveSplit.getLength());
completedBytes = newCompletedBytes;
}
assertTrue(completedBytes <= hiveSplit.getLength());
assertEquals(rowNumber, 100);
}
}
}
}
use of com.facebook.presto.spi.ConnectorPageSource in project presto by prestodb.
the class AbstractTestHiveClientS3 method doCreateTable.
private void doCreateTable(SchemaTableName tableName, HiveStorageFormat storageFormat) throws Exception {
List<ColumnMetadata> columns = ImmutableList.<ColumnMetadata>builder().add(new ColumnMetadata("id", BIGINT)).build();
MaterializedResult data = MaterializedResult.resultBuilder(newSession(), BIGINT).row(1L).row(3L).row(2L).build();
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// begin creating the table
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, columns, createTableProperties(storageFormat));
ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(session, tableMetadata, Optional.empty());
// write the records
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, outputHandle);
sink.appendPage(data.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
// commit the table
metadata.finishCreateTable(session, outputHandle, fragments);
transaction.commit();
// Hack to work around the metastore not being configured for S3.
// The metastore tries to validate the location when creating the
// table, which fails without explicit configuration for S3.
// We work around that by using a dummy location when creating the
// table and update it here to the correct S3 location.
metastoreClient.updateTableLocation(database, tableName.getTableName(), locationService.writePathRoot(((HiveOutputTableHandle) outputHandle).getLocationHandle()).get().toString());
}
try (Transaction transaction = newTransaction()) {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorSession session = newSession();
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), columns);
// verify the data
List<ConnectorTableLayoutResult> tableLayoutResults = metadata.getTableLayouts(session, tableHandle, new Constraint<>(TupleDomain.all(), bindings -> true), Optional.empty());
HiveTableLayoutHandle layoutHandle = (HiveTableLayoutHandle) getOnlyElement(tableLayoutResults).getTableLayout().getHandle();
assertEquals(layoutHandle.getPartitions().get().size(), 1);
ConnectorSplitSource splitSource = splitManager.getSplits(transaction.getTransactionHandle(), session, layoutHandle);
ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));
try (ConnectorPageSource pageSource = pageSourceProvider.createPageSource(transaction.getTransactionHandle(), session, split, columnHandles)) {
MaterializedResult result = materializeSourceDataStream(session, pageSource, getTypes(columnHandles));
assertEqualsIgnoreOrder(result.getMaterializedRows(), data.getMaterializedRows());
}
}
}
Aggregations