use of io.prestosql.spi.connector.ConnectorSession in project hetu-core by openlookeng.
the class TpchIndexMetadata method resolveIndex.
@Override
public Optional<ConnectorResolvedIndex> resolveIndex(ConnectorSession session, ConnectorTableHandle tableHandle, Set<ColumnHandle> indexableColumns, Set<ColumnHandle> outputColumns, TupleDomain<ColumnHandle> tupleDomain) {
TpchTableHandle tpchTableHandle = (TpchTableHandle) tableHandle;
// Keep the fixed values that don't overlap with the indexableColumns
// Note: technically we could more efficiently utilize the overlapped columns, but this way is simpler for now
Map<ColumnHandle, NullableValue> fixedValues = TupleDomain.extractFixedValues(tupleDomain).orElse(ImmutableMap.of()).entrySet().stream().filter(entry -> !indexableColumns.contains(entry.getKey())).filter(// strip nulls since meaningless in index join lookups
entry -> !entry.getValue().isNull()).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
// determine all columns available for index lookup
Set<String> lookupColumnNames = ImmutableSet.<String>builder().addAll(handleToNames(ImmutableList.copyOf(indexableColumns))).addAll(handleToNames(ImmutableList.copyOf(fixedValues.keySet()))).build();
// do we have an index?
if (!indexedData.getIndexedTable(tpchTableHandle.getTableName(), tpchTableHandle.getScaleFactor(), lookupColumnNames).isPresent()) {
return Optional.empty();
}
TupleDomain<ColumnHandle> filteredTupleDomain = tupleDomain;
if (!tupleDomain.isNone()) {
filteredTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(tupleDomain.getDomains().get(), not(in(fixedValues.keySet()))));
}
TpchIndexHandle indexHandle = new TpchIndexHandle(tpchTableHandle.getTableName(), tpchTableHandle.getScaleFactor(), lookupColumnNames, TupleDomain.fromFixedValues(fixedValues));
return Optional.of(new ConnectorResolvedIndex(indexHandle, filteredTupleDomain));
}
use of io.prestosql.spi.connector.ConnectorSession in project hetu-core by openlookeng.
the class CarbondataPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns) {
HiveSplit carbonSplit = Types.checkType(((HiveSplitWrapper) (split)).getSplits().get(0), HiveSplit.class, "split is not class HiveSplit");
this.queryId = carbonSplit.getSchema().getProperty("queryId");
if (this.queryId == null) {
// Fall back to hive pagesource.
return super.createPageSource(transactionHandle, session, split, table, columns);
}
try {
hdfsEnvironment.getFileSystem(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
} catch (IOException e) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Failed to get file system: " + e.getMessage());
}
return hdfsEnvironment.doAs(session.getUser(), () -> {
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
/* So that CarbonTLS can access it */
ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
boolean isFullACID = isFullAcidTable(Maps.fromProperties(carbonSplit.getSchema()));
boolean isDirectVectorFill = (carbonTableReader.config.getPushRowFilter() == null) || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false") || columns.stream().anyMatch(c -> c.getColumnName().equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID));
return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, table, configuration, isDirectVectorFill, isFullACID, session.getUser(), hdfsEnvironment);
});
}
use of io.prestosql.spi.connector.ConnectorSession in project hetu-core by openlookeng.
the class RcFileFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding(timeZone);
} else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = RcFilePageSourceFactory.createTextVectorEncoding(schema);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OutputStream outputStream = fileSystem.create(path);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
} catch (Exception e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
use of io.prestosql.spi.connector.ConnectorSession in project hetu-core by openlookeng.
the class OrcFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
CompressionKind compression = getCompression(schema, configuration);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
List<Type> dataFileColumnTypes = fileColumnTypes;
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
Optional<HiveFileWriter> deleteDeltaWriter = Optional.empty();
if (AcidUtils.isTablePropertyTransactional(schema) && !AcidUtils.isInsertOnlyTable(schema)) {
ImmutableList<String> orcFileColumnNames = ImmutableList.of(OrcPageSourceFactory.ACID_COLUMN_OPERATION, OrcPageSourceFactory.ACID_COLUMN_ORIGINAL_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_BUCKET, OrcPageSourceFactory.ACID_COLUMN_ROW_ID, OrcPageSourceFactory.ACID_COLUMN_CURRENT_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_ROW_STRUCT);
ImmutableList.Builder<RowType.Field> fieldsBuilder = ImmutableList.builder();
for (int i = 0; i < fileColumnNames.size(); i++) {
fieldsBuilder.add(new RowType.Field(Optional.of(fileColumnNames.get(i)), fileColumnTypes.get(i)));
}
ImmutableList<Type> orcFileColumnTypes = ImmutableList.of(INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, RowType.from(fieldsBuilder.build()));
fileColumnNames = orcFileColumnNames;
fileColumnTypes = orcFileColumnTypes;
if (acidWriteType.isPresent() && acidWriteType.get() == HiveACIDWriteType.UPDATE) {
AcidOutputFormat.Options deleteOptions = acidOptions.get().clone().writingDeleteDelta(true);
Path deleteDeltaPath = AcidUtils.createFilename(path.getParent().getParent(), deleteOptions);
deleteDeltaWriter = createFileWriter(deleteDeltaPath, inputColumnNames, storageFormat, schema, configuration, session, Optional.of(deleteOptions), Optional.of(HiveACIDWriteType.DELETE));
}
}
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OrcDataSink orcDataSink = createOrcDataSink(session, fileSystem, path);
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
FileStatus fileStatus = fileSystem.getFileStatus(path);
return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileStatus.getLen(), HiveSessionProperties.getOrcMaxMergeDistance(session), HiveSessionProperties.getOrcMaxBufferSize(session), HiveSessionProperties.getOrcStreamBufferSize(session), false, fileSystem.open(path), readStats, fileStatus.getModificationTime());
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new OrcFileWriter(orcDataSink, rollbackAction, fileColumnNames, fileColumnTypes, dataFileColumnTypes, compression, orcWriterOptions.withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)), writeLegacyVersion, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).put("hive.acid.version", String.valueOf(AcidUtils.OrcAcidVersion.ORC_ACID_VERSION)).build(), validationInputFactory, HiveSessionProperties.getOrcOptimizedWriterValidateMode(session), stats, acidOptions, acidWriteType, deleteDeltaWriter, path));
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of io.prestosql.spi.connector.ConnectorSession in project hetu-core by openlookeng.
the class SyncPartitionMetadataProcedure method doSyncPartitionMetadata.
private void doSyncPartitionMetadata(ConnectorSession session, String schemaName, String tableName, String mode) {
SyncMode syncMode = toSyncMode(mode);
HdfsContext hdfsContext = new HdfsContext(session, schemaName, tableName);
HiveIdentity identity = new HiveIdentity(session);
SemiTransactionalHiveMetastore metastore = ((HiveMetadata) hiveMetadataFactory.get()).getMetastore();
SchemaTableName schemaTableName = new SchemaTableName(schemaName, tableName);
Table table = metastore.getTable(identity, schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (table.getPartitionColumns().isEmpty()) {
throw new PrestoException(INVALID_PROCEDURE_ARGUMENT, "Table is not partitioned: " + schemaTableName);
}
Path tableLocation = new Path(table.getStorage().getLocation());
Set<String> partitionsToAdd;
Set<String> partitionsToDrop;
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(hdfsContext, tableLocation);
List<String> partitionsInMetastore = metastore.getPartitionNames(identity, schemaName, tableName).orElseThrow(() -> new TableNotFoundException(schemaTableName));
List<String> partitionsInFileSystem = listDirectory(fileSystem, fileSystem.getFileStatus(tableLocation), table.getPartitionColumns(), table.getPartitionColumns().size()).stream().map(fileStatus -> fileStatus.getPath().toUri()).map(uri -> tableLocation.toUri().relativize(uri).getPath()).collect(toImmutableList());
// partitions in file system but not in metastore
partitionsToAdd = difference(partitionsInFileSystem, partitionsInMetastore);
// partitions in metastore but not in file system
partitionsToDrop = difference(partitionsInMetastore, partitionsInFileSystem);
} catch (IOException e) {
throw new PrestoException(HIVE_FILESYSTEM_ERROR, e);
}
syncPartitions(partitionsToAdd, partitionsToDrop, syncMode, metastore, session, table);
}
Aggregations