use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class DeltaPageSourceProvider method createParquetPageSource.
private static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, long fileSize, List<DeltaColumnHandle> columns, SchemaTableName tableName, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TypeManager typeManager, TupleDomain<DeltaColumnHandle> effectivePredicate, FileFormatDataSourceStats stats, boolean columnIndexFilterEnabled) {
AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
ParquetDataSource dataSource = null;
try {
FSDataInputStream inputStream = hdfsEnvironment.getFileSystem(user, path, configuration).open(path);
dataSource = buildHdfsParquetDataSource(inputStream, path, stats);
ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, fileSize).getParquetMetadata();
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
MessageType fileSchema = fileMetaData.getSchema();
Optional<MessageType> message = columns.stream().filter(column -> column.getColumnType() == REGULAR || isPushedDownSubfield(column)).map(column -> getColumnType(typeManager.getType(column.getDataType()), fileSchema, column, tableName, path)).filter(Optional::isPresent).map(Optional::get).map(type -> new MessageType(fileSchema.getName(), type)).reduce(MessageType::union);
MessageType requestedSchema = message.orElse(new MessageType(fileSchema.getName(), ImmutableList.of()));
ImmutableList.Builder<BlockMetaData> footerBlocks = ImmutableList.builder();
for (BlockMetaData block : parquetMetadata.getBlocks()) {
long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
if (firstDataPage >= start && firstDataPage < start + length) {
footerBlocks.add(block);
}
}
Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
final ParquetDataSource finalDataSource = dataSource;
ImmutableList.Builder<BlockMetaData> blocks = ImmutableList.builder();
List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
for (BlockMetaData block : footerBlocks.build()) {
Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
if (predicateMatches(parquetPredicate, block, finalDataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
blocks.add(block);
blockIndexStores.add(columnIndexStore.orElse(null));
}
}
MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks.build(), dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder();
ImmutableList.Builder<Optional<Field>> fieldsBuilder = ImmutableList.builder();
for (DeltaColumnHandle column : columns) {
checkArgument(column.getColumnType() == REGULAR || column.getColumnType() == SUBFIELD, "column type must be regular or subfield column");
String name = column.getName();
Type type = typeManager.getType(column.getDataType());
namesBuilder.add(name);
typesBuilder.add(type);
if (isPushedDownSubfield(column)) {
Subfield pushedDownSubfield = getPushedDownSubfield(column);
List<String> nestedColumnPath = nestedColumnPath(pushedDownSubfield);
Optional<ColumnIO> columnIO = findNestedColumnIO(lookupColumnByName(messageColumnIO, pushedDownSubfield.getRootName()), nestedColumnPath);
if (columnIO.isPresent()) {
fieldsBuilder.add(constructField(type, columnIO.get()));
} else {
fieldsBuilder.add(Optional.empty());
}
} else if (getParquetType(type, fileSchema, column, tableName, path).isPresent()) {
fieldsBuilder.add(constructField(type, lookupColumnByName(messageColumnIO, name)));
} else {
fieldsBuilder.add(Optional.empty());
}
}
return new ParquetPageSource(parquetReader, typesBuilder.build(), fieldsBuilder.build(), namesBuilder.build(), new RuntimeStats());
} catch (Exception exception) {
try {
if (dataSource != null) {
dataSource.close();
}
} catch (IOException ignored) {
}
if (exception instanceof PrestoException) {
throw (PrestoException) exception;
}
if (exception instanceof ParquetCorruptionException) {
throw new PrestoException(DELTA_BAD_DATA, exception);
}
if (exception instanceof AccessControlException) {
throw new PrestoException(PERMISSION_DENIED, exception.getMessage(), exception);
}
if (nullToEmpty(exception.getMessage()).trim().equals("Filesystem closed") || exception instanceof FileNotFoundException) {
throw new PrestoException(DELTA_CANNOT_OPEN_SPLIT, exception);
}
String message = format("Error opening Hive split %s (offset=%s, length=%s): %s", path, start, length, exception.getMessage());
if (exception.getClass().getSimpleName().equals("BlockMissingException")) {
throw new PrestoException(DELTA_MISSING_DATA, message, exception);
}
throw new PrestoException(DELTA_CANNOT_OPEN_SPLIT, message, exception);
}
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class RcFileFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<EncryptionInformation> encryptionInformation) {
if (!HiveSessionProperties.isRcfileOptimizedWriterEnabled(session)) {
return Optional.empty();
}
if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = createTextVectorEncoding(schema, hiveStorageTimeZone);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = Splitter.on(',').trimResults().omitEmptyStrings().splitToList(schema.getProperty(META_TABLE_COLUMNS, ""));
List<Type> fileColumnTypes = toHiveTypes(schema.getProperty(META_TABLE_COLUMN_TYPES, "")).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OutputStream outputStream = fileSystem.create(path);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new PrestoException(HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(MetastoreUtil.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
} catch (Exception e) {
throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class DeterminismAnalyzer method analyze.
private DeterminismAnalysis analyze(QueryObjectBundle control, ChecksumResult controlChecksum, DeterminismAnalysisDetails.Builder determinismAnalysisDetails) {
// Handle mutable catalogs
if (isNonDeterministicCatalogReferenced(control.getQuery())) {
return NON_DETERMINISTIC_CATALOG;
}
// Handle limit query
LimitQueryDeterminismAnalysis limitQueryAnalysis = new LimitQueryDeterminismAnalyzer(prestoAction, handleLimitQuery, control.getQuery(), controlChecksum.getRowCount(), determinismAnalysisDetails).analyze();
switch(limitQueryAnalysis) {
case NOT_RUN:
case FAILED_QUERY_FAILURE:
case DETERMINISTIC:
// try the next analysis
break;
case NON_DETERMINISTIC:
return NON_DETERMINISTIC_LIMIT_CLAUSE;
case FAILED_DATA_CHANGED:
return ANALYSIS_FAILED_DATA_CHANGED;
default:
throw new IllegalArgumentException(format("Invalid limitQueryAnalysis: %s", limitQueryAnalysis));
}
// Rerun control query multiple times
List<Column> columns = getColumns(prestoAction, typeManager, control.getObjectName());
Map<QueryBundle, DeterminismAnalysisRun.Builder> queryRuns = new HashMap<>();
try {
for (int i = 0; i < maxAnalysisRuns; i++) {
QueryObjectBundle queryBundle = queryRewriter.rewriteQuery(sourceQuery.getQuery(CONTROL), CONTROL);
DeterminismAnalysisRun.Builder run = determinismAnalysisDetails.addRun().setTableName(queryBundle.getObjectName().toString());
queryRuns.put(queryBundle, run);
// Rerun setup and main query
queryBundle.getSetupQueries().forEach(query -> runAndConsume(() -> prestoAction.execute(query, DETERMINISM_ANALYSIS_SETUP), stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::addSetupQueryId)));
runAndConsume(() -> prestoAction.execute(queryBundle.getQuery(), DETERMINISM_ANALYSIS_MAIN), stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::setQueryId));
// Run checksum query
Query checksumQuery = checksumValidator.generateChecksumQuery(queryBundle.getObjectName(), columns);
ChecksumResult testChecksum = getOnlyElement(callAndConsume(() -> prestoAction.execute(checksumQuery, DETERMINISM_ANALYSIS_CHECKSUM, ChecksumResult::fromResultSet), stats -> stats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::setChecksumQueryId)).getResults());
DeterminismAnalysis analysis = matchResultToDeterminism(match(checksumValidator, columns, columns, controlChecksum, testChecksum));
if (analysis != DETERMINISTIC) {
return analysis;
}
}
return DETERMINISTIC;
} catch (QueryException qe) {
return ANALYSIS_FAILED_QUERY_FAILURE;
} finally {
if (runTeardown) {
queryRuns.forEach((queryBundle, run) -> teardownSafely(prestoAction, Optional.of(queryBundle), queryStats -> queryStats.getQueryStats().map(QueryStats::getQueryId).ifPresent(run::addTeardownQueryId)));
}
}
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class AbstractVerificationTest method verify.
private Optional<VerifierQueryEvent> verify(SourceQuery sourceQuery, boolean explain, Optional<PrestoAction> mockPrestoAction) {
VerifierConfig verifierConfig = new VerifierConfig().setTestId(TEST_ID).setExplain(explain);
TypeManager typeManager = createTypeManager();
PrestoAction prestoAction = mockPrestoAction.orElseGet(() -> getPrestoAction(Optional.of(sourceQuery.getControlConfiguration())));
QueryRewriterFactory queryRewriterFactory = new VerificationQueryRewriterFactory(sqlParser, typeManager, new QueryRewriteConfig().setTablePrefix(CONTROL_TABLE_PREFIX), new QueryRewriteConfig().setTablePrefix(TEST_TABLE_PREFIX));
VerificationFactory verificationFactory = new VerificationFactory(sqlParser, (source, context) -> new QueryActions(prestoAction, prestoAction, prestoAction), queryRewriterFactory, failureResolverManagerFactory, createChecksumValidator(verifierConfig), exceptionClassifier, verifierConfig, typeManager, determinismAnalyzerConfig);
return verificationFactory.get(sourceQuery, Optional.empty()).run().getEvent();
}
use of com.facebook.presto.common.type.TypeManager in project presto by prestodb.
the class TestDeterminismAnalyzer method createDeterminismAnalyzer.
private static DeterminismAnalyzer createDeterminismAnalyzer(String mutableCatalogPattern) {
QueryConfiguration configuration = new QueryConfiguration(CATALOG, SCHEMA, Optional.of("user"), Optional.empty(), Optional.empty());
VerificationContext verificationContext = VerificationContext.create(SUITE, NAME);
VerifierConfig verifierConfig = new VerifierConfig().setTestId("test-id");
RetryConfig retryConfig = new RetryConfig();
QueryActionsConfig queryActionsConfig = new QueryActionsConfig();
TypeManager typeManager = createTypeManager();
PrestoAction prestoAction = new JdbcPrestoAction(PrestoExceptionClassifier.defaultBuilder().build(), configuration, verificationContext, new JdbcUrlSelector(ImmutableList.of()), new PrestoActionConfig(), queryActionsConfig.getMetadataTimeout(), queryActionsConfig.getChecksumTimeout(), retryConfig, retryConfig, verifierConfig);
QueryRewriter queryRewriter = new QueryRewriter(sqlParser, typeManager, prestoAction, ImmutableMap.of(CONTROL, QualifiedName.of("tmp_verifier_c"), TEST, QualifiedName.of("tmp_verifier_t")), ImmutableMap.of());
ChecksumValidator checksumValidator = createChecksumValidator(verifierConfig);
SourceQuery sourceQuery = new SourceQuery("test", "", "", "", configuration, configuration);
return new DeterminismAnalyzer(sourceQuery, prestoAction, queryRewriter, checksumValidator, typeManager, new DeterminismAnalyzerConfig().setNonDeterministicCatalogs(mutableCatalogPattern));
}
Aggregations