use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class TableStatisticsMaker method makeTableStatistics.
private TableStatistics makeTableStatistics(IcebergTableHandle tableHandle, Constraint constraint) {
if (!tableHandle.getSnapshotId().isPresent() || constraint.getSummary().isNone()) {
return TableStatistics.empty();
}
TupleDomain<IcebergColumnHandle> intersection = constraint.getSummary().transform(IcebergColumnHandle.class::cast).intersect(tableHandle.getPredicate());
if (intersection.isNone()) {
return TableStatistics.empty();
}
List<Types.NestedField> columns = icebergTable.schema().columns();
Map<Integer, Type.PrimitiveType> idToTypeMapping = columns.stream().filter(column -> column.type().isPrimitiveType()).collect(Collectors.toMap(Types.NestedField::fieldId, column -> column.type().asPrimitiveType()));
List<PartitionField> partitionFields = icebergTable.spec().fields();
Set<Integer> identityPartitionIds = getIdentityPartitions(icebergTable.spec()).keySet().stream().map(PartitionField::sourceId).collect(toSet());
List<Types.NestedField> nonPartitionPrimitiveColumns = columns.stream().filter(column -> !identityPartitionIds.contains(column.fieldId()) && column.type().isPrimitiveType()).collect(toImmutableList());
List<Type> icebergPartitionTypes = partitionTypes(partitionFields, idToTypeMapping);
List<IcebergColumnHandle> columnHandles = getColumns(icebergTable.schema(), typeManager);
Map<Integer, IcebergColumnHandle> idToColumnHandle = columnHandles.stream().collect(toImmutableMap(IcebergColumnHandle::getId, identity()));
ImmutableMap.Builder<Integer, ColumnFieldDetails> idToDetailsBuilder = ImmutableMap.builder();
for (int index = 0; index < partitionFields.size(); index++) {
PartitionField field = partitionFields.get(index);
Type type = icebergPartitionTypes.get(index);
idToDetailsBuilder.put(field.sourceId(), new ColumnFieldDetails(field, idToColumnHandle.get(field.sourceId()), type, toPrestoType(type, typeManager), type.typeId().javaClass()));
}
Map<Integer, ColumnFieldDetails> idToDetails = idToDetailsBuilder.build();
TableScan tableScan = icebergTable.newScan().filter(toIcebergExpression(intersection)).useSnapshot(tableHandle.getSnapshotId().get()).includeColumnStats();
Partition summary = null;
try (CloseableIterable<FileScanTask> fileScanTasks = tableScan.planFiles()) {
for (FileScanTask fileScanTask : fileScanTasks) {
DataFile dataFile = fileScanTask.file();
if (!dataFileMatches(dataFile, constraint, idToTypeMapping, partitionFields, idToDetails)) {
continue;
}
if (summary == null) {
summary = new Partition(idToTypeMapping, nonPartitionPrimitiveColumns, dataFile.partition(), dataFile.recordCount(), dataFile.fileSizeInBytes(), toMap(idToTypeMapping, dataFile.lowerBounds()), toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.columnSizes());
} else {
summary.incrementFileCount();
summary.incrementRecordCount(dataFile.recordCount());
summary.incrementSize(dataFile.fileSizeInBytes());
updateSummaryMin(summary, partitionFields, toMap(idToTypeMapping, dataFile.lowerBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
updateSummaryMax(summary, partitionFields, toMap(idToTypeMapping, dataFile.upperBounds()), dataFile.nullValueCounts(), dataFile.recordCount());
summary.updateNullCount(dataFile.nullValueCounts());
updateColumnSizes(summary, dataFile.columnSizes());
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
if (summary == null) {
return TableStatistics.empty();
}
double recordCount = summary.getRecordCount();
TableStatistics.Builder result = TableStatistics.builder();
result.setRowCount(Estimate.of(recordCount));
result.setTotalSize(Estimate.of(summary.getSize()));
for (IcebergColumnHandle columnHandle : idToColumnHandle.values()) {
int fieldId = columnHandle.getId();
ColumnStatistics.Builder columnBuilder = new ColumnStatistics.Builder();
Long nullCount = summary.getNullCounts().get(fieldId);
if (nullCount != null) {
columnBuilder.setNullsFraction(Estimate.of(nullCount / recordCount));
}
if (summary.getColumnSizes() != null) {
Long columnSize = summary.getColumnSizes().get(fieldId);
if (columnSize != null) {
columnBuilder.setDataSize(Estimate.of(columnSize));
}
}
Object min = summary.getMinValues().get(fieldId);
Object max = summary.getMaxValues().get(fieldId);
if (min instanceof Number && max instanceof Number) {
columnBuilder.setRange(Optional.of(new DoubleRange(((Number) min).doubleValue(), ((Number) max).doubleValue())));
}
result.setColumnStatistics(columnHandle, columnBuilder.build());
}
return result.build();
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class IcebergFileWriterFactory method createParquetWriter.
private IcebergFileWriter createParquetWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session, HdfsContext hdfsContext) {
List<String> fileColumnNames = icebergSchema.columns().stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = icebergSchema.columns().stream().map(column -> toPrestoType(column.type(), typeManager)).collect(toImmutableList());
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
Callable<Void> rollbackAction = () -> {
fileSystem.delete(outputPath, false);
return null;
};
ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder().setMaxPageSize(getParquetWriterPageSize(session)).setMaxPageSize(getParquetWriterBlockSize(session)).build();
return new IcebergParquetFileWriter(hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.create(outputPath)), rollbackAction, fileColumnNames, fileColumnTypes, convert(icebergSchema, "table"), makeTypeMap(fileColumnTypes, fileColumnNames), parquetWriterOptions, IntStream.range(0, fileColumnNames.size()).toArray(), getCompressionCodec(session).getParquetCompressionCodec().get(), outputPath, hdfsEnvironment, hdfsContext);
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
}
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class AccumuloModule method configure.
@Override
public void configure(Binder binder) {
// Add appender to Log4J root logger
// create appender
JulAppender appender = new JulAppender();
appender.setLayout(new PatternLayout("%d %-5p %c - %m%n"));
appender.setThreshold(Level.INFO);
appender.activateOptions();
org.apache.log4j.Logger.getRootLogger().addAppender(appender);
binder.bind(TypeManager.class).toInstance(typeManager);
binder.bind(AccumuloConnector.class).in(Scopes.SINGLETON);
binder.bind(AccumuloConnectorId.class).toInstance(new AccumuloConnectorId(connectorId));
binder.bind(AccumuloMetadata.class).in(Scopes.SINGLETON);
binder.bind(AccumuloMetadataFactory.class).in(Scopes.SINGLETON);
binder.bind(AccumuloClient.class).in(Scopes.SINGLETON);
binder.bind(AccumuloSplitManager.class).in(Scopes.SINGLETON);
binder.bind(AccumuloRecordSetProvider.class).in(Scopes.SINGLETON);
binder.bind(AccumuloPageSinkProvider.class).in(Scopes.SINGLETON);
binder.bind(AccumuloHandleResolver.class).in(Scopes.SINGLETON);
binder.bind(AccumuloSessionProperties.class).in(Scopes.SINGLETON);
binder.bind(AccumuloTableProperties.class).in(Scopes.SINGLETON);
binder.bind(ZooKeeperMetadataManager.class).in(Scopes.SINGLETON);
binder.bind(AccumuloTableManager.class).in(Scopes.SINGLETON);
binder.bind(IndexLookup.class).in(Scopes.SINGLETON);
binder.bind(ColumnCardinalityCache.class).in(Scopes.SINGLETON);
binder.bind(Connector.class).toProvider(ConnectorProvider.class);
configBinder(binder).bindConfig(AccumuloConfig.class);
jsonBinder(binder).addDeserializerBinding(Type.class).to(TypeDeserializer.class);
jsonCodecBinder(binder).bindMapJsonCodec(String.class, JsonCodec.listJsonCodec(AccumuloTable.class));
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class DeltaExpressionUtils method iterateWithPartitionPruning.
/**
* Utility method that takes an iterator of {@link AddFile}s and a predicate and returns an iterator of {@link AddFile}s
* that satisfy the predicate (predicate evaluates to a deterministic NO)
*/
public static CloseableIterator<AddFile> iterateWithPartitionPruning(CloseableIterator<AddFile> inputIterator, TupleDomain<DeltaColumnHandle> predicate, TypeManager typeManager) {
TupleDomain<String> partitionPredicate = extractPartitionColumnsPredicate(predicate);
if (partitionPredicate.isAll()) {
// there is no partition filter, return the input iterator as is.
return inputIterator;
}
if (partitionPredicate.isNone()) {
// nothing passes the partition predicate, return empty iterator
return new CloseableIterator<AddFile>() {
@Override
public boolean hasNext() {
return false;
}
@Override
public AddFile next() {
throw new NoSuchElementException();
}
@Override
public void close() throws IOException {
inputIterator.close();
}
};
}
List<DeltaColumnHandle> partitionColumns = predicate.getColumnDomains().get().stream().filter(entry -> entry.getColumn().getColumnType() == PARTITION).map(entry -> entry.getColumn()).collect(Collectors.toList());
return new CloseableIterator<AddFile>() {
private AddFile nextItem;
@Override
public boolean hasNext() {
if (nextItem != null) {
return true;
}
while (inputIterator.hasNext()) {
AddFile nextFile = inputIterator.next();
if (evaluatePartitionPredicate(partitionPredicate, partitionColumns, typeManager, nextFile)) {
nextItem = nextFile;
break;
}
}
return nextItem != null;
}
@Override
public AddFile next() {
if (!hasNext()) {
throw new NoSuchElementException("there are no more files");
}
AddFile toReturn = nextItem;
nextItem = null;
return toReturn;
}
@Override
public void close() throws IOException {
inputIterator.close();
}
};
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class DeltaPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableLayoutHandle layout, List<ColumnHandle> columns, SplitContext splitContext) {
DeltaSplit deltaSplit = (DeltaSplit) split;
DeltaTableLayoutHandle deltaTableLayoutHandle = (DeltaTableLayoutHandle) layout;
DeltaTableHandle deltaTableHandle = deltaTableLayoutHandle.getTable();
HdfsContext hdfsContext = new HdfsContext(session, deltaSplit.getSchema(), deltaSplit.getTable(), deltaSplit.getFilePath(), false);
Path filePath = new Path(deltaSplit.getFilePath());
List<DeltaColumnHandle> deltaColumnHandles = columns.stream().map(DeltaColumnHandle.class::cast).collect(Collectors.toList());
List<DeltaColumnHandle> regularColumnHandles = deltaColumnHandles.stream().filter(columnHandle -> columnHandle.getColumnType() != PARTITION).collect(Collectors.toList());
ConnectorPageSource dataPageSource = createParquetPageSource(hdfsEnvironment, session.getUser(), hdfsEnvironment.getConfiguration(hdfsContext, filePath), filePath, deltaSplit.getStart(), deltaSplit.getLength(), deltaSplit.getFileSize(), regularColumnHandles, deltaTableHandle.toSchemaTableName(), getParquetMaxReadBlockSize(session), isParquetBatchReadsEnabled(session), isParquetBatchReaderVerificationEnabled(session), typeManager, deltaTableLayoutHandle.getPredicate(), fileFormatDataSourceStats, false);
return new DeltaPageSource(deltaColumnHandles, convertPartitionValues(deltaColumnHandles, deltaSplit.getPartitionValues()), dataPageSource);
}
Aggregations