use of io.prestosql.plugin.hive.HiveSplit in project carbondata by apache.
the class CarbondataPageSourceProvider method createPageSource.
@Override
public ConnectorPageSource createPageSource(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorSplit split, ConnectorTableHandle table, List<ColumnHandle> columns, TupleDomain<ColumnHandle> dynamicFilter) {
HiveSplit carbonSplit = checkType(split, HiveSplit.class, "split is not class HiveSplit");
this.queryId = carbonSplit.getSchema().getProperty("queryId");
if (this.queryId == null) {
// Fall back to hive pagesource.
return super.createPageSource(transaction, session, split, table, columns, dynamicFilter);
}
// TODO: check and use dynamicFilter in CarbondataPageSource
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, carbonSplit.getDatabase(), carbonSplit.getTable()), new Path(carbonSplit.getSchema().getProperty("tablePath")));
configuration = carbonTableReader.updateS3Properties(configuration);
for (Map.Entry<Object, Object> entry : carbonSplit.getSchema().entrySet()) {
configuration.set(entry.getKey().toString(), entry.getValue().toString());
}
CarbonTable carbonTable = getCarbonTable(carbonSplit, configuration);
boolean isDirectVectorFill = carbonTableReader.config.getPushRowFilter() == null || carbonTableReader.config.getPushRowFilter().equalsIgnoreCase("false");
return new CarbondataPageSource(carbonTable, queryId, carbonSplit, columns, table, configuration, isDirectVectorFill);
}
use of io.prestosql.plugin.hive.HiveSplit in project carbondata by apache.
the class CarbondataSplitManager method getSplits.
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transactionHandle, ConnectorSession session, ConnectorTableHandle tableHandle, SplitSchedulingStrategy splitSchedulingStrategy) {
HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle;
SchemaTableName schemaTableName = hiveTableHandle.getSchemaTableName();
carbonTableReader.setPrestoQueryId(session.getQueryId());
// get table metadata
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transactionHandle);
Table table = metastore.getTable(new HiveIdentity(session), schemaTableName.getSchemaName(), schemaTableName.getTableName()).orElseThrow(() -> new TableNotFoundException(schemaTableName));
if (!table.getStorage().getStorageFormat().getInputFormat().contains("carbon")) {
return super.getSplits(transactionHandle, session, tableHandle, splitSchedulingStrategy);
}
// for hive metastore, get table location from catalog table's tablePath
String location = table.getStorage().getSerdeParameters().get("tablePath");
if (StringUtils.isEmpty(location)) {
// file metastore case tablePath can be null, so get from location
location = table.getStorage().getLocation();
}
List<PartitionSpec> filteredPartitions = new ArrayList<>();
if (hiveTableHandle.getPartitionColumns().size() > 0 && hiveTableHandle.getPartitions().isPresent()) {
List<String> colNames = hiveTableHandle.getPartitionColumns().stream().map(HiveColumnHandle::getName).collect(Collectors.toList());
for (HivePartition partition : hiveTableHandle.getPartitions().get()) {
filteredPartitions.add(new PartitionSpec(colNames, location + CarbonCommonConstants.FILE_SEPARATOR + partition.getPartitionId()));
}
}
String queryId = System.nanoTime() + "";
QueryStatistic statistic = new QueryStatistic();
QueryStatisticsRecorder statisticRecorder = CarbonTimeStatisticsFactory.createDriverRecorder();
statistic.addStatistics(QueryStatisticsConstants.BLOCK_ALLOCATION, System.currentTimeMillis());
statisticRecorder.recordStatisticsForDriver(statistic, queryId);
statistic = new QueryStatistic();
carbonTableReader.setQueryId(queryId);
TupleDomain<HiveColumnHandle> predicate = hiveTableHandle.getCompactEffectivePredicate();
Configuration configuration = this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, schemaTableName.getSchemaName(), schemaTableName.getTableName()), new Path(location));
configuration = carbonTableReader.updateS3Properties(configuration);
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
configuration.set(entry.getKey(), entry.getValue());
}
// set the hadoop configuration to thread local, so that FileFactory can use it.
ThreadLocalSessionInfo.setConfigurationToCurrentThread(configuration);
CarbonTableCacheModel cache = carbonTableReader.getCarbonCache(schemaTableName, location, configuration);
Expression filters = PrestoFilterUtil.parseFilterExpression(predicate);
try {
List<CarbonLocalMultiBlockSplit> splits = carbonTableReader.getInputSplits(cache, filters, filteredPartitions, configuration);
ImmutableList.Builder<ConnectorSplit> cSplits = ImmutableList.builder();
long index = 0;
for (CarbonLocalMultiBlockSplit split : splits) {
index++;
Properties properties = new Properties();
for (Map.Entry<String, String> entry : table.getStorage().getSerdeParameters().entrySet()) {
properties.setProperty(entry.getKey(), entry.getValue());
}
properties.setProperty("tablePath", cache.getCarbonTable().getTablePath());
properties.setProperty("carbonSplit", split.getJsonString());
properties.setProperty("queryId", queryId);
properties.setProperty("index", String.valueOf(index));
cSplits.add(new HiveSplit(schemaTableName.getSchemaName(), schemaTableName.getTableName(), schemaTableName.getTableName(), cache.getCarbonTable().getTablePath(), 0, 0, 0, 0, properties, new ArrayList<>(), getHostAddresses(split.getLocations()), OptionalInt.empty(), false, TableToPartitionMapping.empty(), Optional.empty(), false, Optional.empty()));
}
statisticRecorder.logStatisticsAsTableDriver();
statistic.addStatistics(QueryStatisticsConstants.BLOCK_IDENTIFICATION, System.currentTimeMillis());
statisticRecorder.recordStatisticsForDriver(statistic, queryId);
statisticRecorder.logStatisticsAsTableDriver();
return new FixedSplitSource(cSplits.build());
} catch (Exception ex) {
throw new RuntimeException(ex.getMessage(), ex);
}
}
Aggregations